OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 #include "webrtc/modules/audio_processing/aec3/echo_remover.h" | 10 #include "webrtc/modules/audio_processing/aec3/echo_remover.h" |
11 | 11 |
| 12 #include <math.h> |
12 #include <algorithm> | 13 #include <algorithm> |
13 #include <memory> | 14 #include <memory> |
14 #include <numeric> | 15 #include <numeric> |
15 #include <string> | 16 #include <string> |
16 | 17 |
17 #include "webrtc/base/array_view.h" | 18 #include "webrtc/base/array_view.h" |
18 #include "webrtc/base/atomicops.h" | 19 #include "webrtc/base/atomicops.h" |
19 #include "webrtc/base/constructormagic.h" | 20 #include "webrtc/base/constructormagic.h" |
20 #include "webrtc/modules/audio_processing/aec3/aec3_common.h" | 21 #include "webrtc/modules/audio_processing/aec3/aec3_common.h" |
21 #include "webrtc/modules/audio_processing/aec3/aec_state.h" | 22 #include "webrtc/modules/audio_processing/aec3/aec_state.h" |
22 #include "webrtc/modules/audio_processing/aec3/comfort_noise_generator.h" | 23 #include "webrtc/modules/audio_processing/aec3/comfort_noise_generator.h" |
23 #include "webrtc/modules/audio_processing/aec3/echo_path_variability.h" | 24 #include "webrtc/modules/audio_processing/aec3/echo_path_variability.h" |
24 #include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h" | 25 #include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h" |
25 #include "webrtc/modules/audio_processing/aec3/fft_data.h" | 26 #include "webrtc/modules/audio_processing/aec3/fft_data.h" |
26 #include "webrtc/modules/audio_processing/aec3/output_selector.h" | 27 #include "webrtc/modules/audio_processing/aec3/output_selector.h" |
27 #include "webrtc/modules/audio_processing/aec3/power_echo_model.h" | |
28 #include "webrtc/modules/audio_processing/aec3/render_buffer.h" | 28 #include "webrtc/modules/audio_processing/aec3/render_buffer.h" |
29 #include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h" | 29 #include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h" |
30 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" | 30 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" |
31 #include "webrtc/modules/audio_processing/aec3/subtractor.h" | 31 #include "webrtc/modules/audio_processing/aec3/subtractor.h" |
32 #include "webrtc/modules/audio_processing/aec3/suppression_filter.h" | 32 #include "webrtc/modules/audio_processing/aec3/suppression_filter.h" |
33 #include "webrtc/modules/audio_processing/aec3/suppression_gain.h" | 33 #include "webrtc/modules/audio_processing/aec3/suppression_gain.h" |
34 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" | 34 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" |
35 | 35 |
36 namespace webrtc { | 36 namespace webrtc { |
37 | 37 |
38 namespace { | 38 namespace { |
39 | 39 |
40 void LinearEchoPower(const FftData& E, | 40 void LinearEchoPower(const FftData& E, |
41 const FftData& Y, | 41 const FftData& Y, |
42 std::array<float, kFftLengthBy2Plus1>* S2) { | 42 std::array<float, kFftLengthBy2Plus1>* S2) { |
43 for (size_t k = 0; k < E.re.size(); ++k) { | 43 for (size_t k = 0; k < E.re.size(); ++k) { |
44 (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) + | 44 (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) + |
45 (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]); | 45 (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]); |
46 } | 46 } |
47 } | 47 } |
48 | 48 |
49 float BlockPower(const std::array<float, kBlockSize> x) { | |
50 return std::accumulate(x.begin(), x.end(), 0.f, | |
51 [](float a, float b) -> float { return a + b * b; }); | |
52 } | |
53 | |
54 // Class for removing the echo from the capture signal. | 49 // Class for removing the echo from the capture signal. |
55 class EchoRemoverImpl final : public EchoRemover { | 50 class EchoRemoverImpl final : public EchoRemover { |
56 public: | 51 public: |
57 explicit EchoRemoverImpl(int sample_rate_hz); | 52 explicit EchoRemoverImpl(int sample_rate_hz); |
58 ~EchoRemoverImpl() override; | 53 ~EchoRemoverImpl() override; |
59 | 54 |
60 // Removes the echo from a block of samples from the capture signal. The | 55 // Removes the echo from a block of samples from the capture signal. The |
61 // supplied render signal is assumed to be pre-aligned with the capture | 56 // supplied render signal is assumed to be pre-aligned with the capture |
62 // signal. | 57 // signal. |
63 void ProcessCapture( | 58 void ProcessCapture( |
(...skipping 12 matching lines...) Expand all Loading... |
76 private: | 71 private: |
77 static int instance_count_; | 72 static int instance_count_; |
78 const Aec3Fft fft_; | 73 const Aec3Fft fft_; |
79 std::unique_ptr<ApmDataDumper> data_dumper_; | 74 std::unique_ptr<ApmDataDumper> data_dumper_; |
80 const Aec3Optimization optimization_; | 75 const Aec3Optimization optimization_; |
81 const int sample_rate_hz_; | 76 const int sample_rate_hz_; |
82 Subtractor subtractor_; | 77 Subtractor subtractor_; |
83 SuppressionGain suppression_gain_; | 78 SuppressionGain suppression_gain_; |
84 ComfortNoiseGenerator cng_; | 79 ComfortNoiseGenerator cng_; |
85 SuppressionFilter suppression_filter_; | 80 SuppressionFilter suppression_filter_; |
86 PowerEchoModel power_echo_model_; | |
87 RenderBuffer X_buffer_; | |
88 RenderSignalAnalyzer render_signal_analyzer_; | 81 RenderSignalAnalyzer render_signal_analyzer_; |
89 OutputSelector output_selector_; | 82 OutputSelector output_selector_; |
90 ResidualEchoEstimator residual_echo_estimator_; | 83 ResidualEchoEstimator residual_echo_estimator_; |
91 bool echo_leakage_detected_ = false; | 84 bool echo_leakage_detected_ = false; |
92 AecState aec_state_; | 85 AecState aec_state_; |
93 EchoRemoverMetrics metrics_; | 86 EchoRemoverMetrics metrics_; |
94 | 87 |
95 RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl); | 88 RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl); |
96 }; | 89 }; |
97 | 90 |
98 int EchoRemoverImpl::instance_count_ = 0; | 91 int EchoRemoverImpl::instance_count_ = 0; |
99 | 92 |
100 EchoRemoverImpl::EchoRemoverImpl(int sample_rate_hz) | 93 EchoRemoverImpl::EchoRemoverImpl(int sample_rate_hz) |
101 : fft_(), | 94 : fft_(), |
102 data_dumper_( | 95 data_dumper_( |
103 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), | 96 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
104 optimization_(DetectOptimization()), | 97 optimization_(DetectOptimization()), |
105 sample_rate_hz_(sample_rate_hz), | 98 sample_rate_hz_(sample_rate_hz), |
106 subtractor_(data_dumper_.get(), optimization_), | 99 subtractor_(data_dumper_.get(), optimization_), |
107 suppression_gain_(optimization_), | 100 suppression_gain_(optimization_), |
108 cng_(optimization_), | 101 cng_(optimization_), |
109 suppression_filter_(sample_rate_hz_), | 102 suppression_filter_(sample_rate_hz_) { |
110 X_buffer_(optimization_, | |
111 NumBandsForRate(sample_rate_hz_), | |
112 std::max(subtractor_.MinFarendBufferLength(), | |
113 power_echo_model_.MinFarendBufferLength()), | |
114 subtractor_.NumBlocksInRenderSums()) { | |
115 RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); | 103 RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); |
116 } | 104 } |
117 | 105 |
118 EchoRemoverImpl::~EchoRemoverImpl() = default; | 106 EchoRemoverImpl::~EchoRemoverImpl() = default; |
119 | 107 |
120 void EchoRemoverImpl::ProcessCapture( | 108 void EchoRemoverImpl::ProcessCapture( |
121 const rtc::Optional<size_t>& echo_path_delay_samples, | 109 const rtc::Optional<size_t>& echo_path_delay_samples, |
122 const EchoPathVariability& echo_path_variability, | 110 const EchoPathVariability& echo_path_variability, |
123 bool capture_signal_saturation, | 111 bool capture_signal_saturation, |
124 const RenderBuffer& render_buffer, | 112 const RenderBuffer& render_buffer, |
125 std::vector<std::vector<float>>* capture) { | 113 std::vector<std::vector<float>>* capture) { |
126 const std::vector<std::vector<float>>& x = render_buffer.MostRecentBlock(); | 114 const std::vector<std::vector<float>>& x = render_buffer.MostRecentBlock(); |
127 std::vector<std::vector<float>>* y = capture; | 115 std::vector<std::vector<float>>* y = capture; |
128 | 116 |
129 RTC_DCHECK(y); | 117 RTC_DCHECK(y); |
130 RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_)); | 118 RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_)); |
131 RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_)); | 119 RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_)); |
132 RTC_DCHECK_EQ(x[0].size(), kBlockSize); | 120 RTC_DCHECK_EQ(x[0].size(), kBlockSize); |
133 RTC_DCHECK_EQ((*y)[0].size(), kBlockSize); | 121 RTC_DCHECK_EQ((*y)[0].size(), kBlockSize); |
134 const std::vector<float>& x0 = x[0]; | 122 const std::vector<float>& x0 = x[0]; |
135 std::vector<float>& y0 = (*y)[0]; | 123 std::vector<float>& y0 = (*y)[0]; |
136 | 124 |
137 data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize, &y0[0], | 125 data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize, &y0[0], |
138 LowestBandRate(sample_rate_hz_), 1); | 126 LowestBandRate(sample_rate_hz_), 1); |
139 data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize, &x0[0], | 127 data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0], |
140 LowestBandRate(sample_rate_hz_), 1); | 128 LowestBandRate(sample_rate_hz_), 1); |
141 | 129 |
142 aec_state_.UpdateCaptureSaturation(capture_signal_saturation); | 130 aec_state_.UpdateCaptureSaturation(capture_signal_saturation); |
143 | 131 |
144 if (echo_path_variability.AudioPathChanged()) { | 132 if (echo_path_variability.AudioPathChanged()) { |
145 subtractor_.HandleEchoPathChange(echo_path_variability); | 133 subtractor_.HandleEchoPathChange(echo_path_variability); |
146 residual_echo_estimator_.HandleEchoPathChange(echo_path_variability); | 134 aec_state_.HandleEchoPathChange(echo_path_variability); |
147 } | 135 } |
148 | 136 |
149 std::array<float, kFftLengthBy2Plus1> Y2; | 137 std::array<float, kFftLengthBy2Plus1> Y2; |
150 std::array<float, kFftLengthBy2Plus1> S2_power; | |
151 std::array<float, kFftLengthBy2Plus1> R2; | 138 std::array<float, kFftLengthBy2Plus1> R2; |
152 std::array<float, kFftLengthBy2Plus1> S2_linear; | 139 std::array<float, kFftLengthBy2Plus1> S2_linear; |
153 std::array<float, kFftLengthBy2Plus1> G; | 140 std::array<float, kFftLengthBy2Plus1> G; |
| 141 float high_bands_gain; |
154 FftData Y; | 142 FftData Y; |
155 FftData comfort_noise; | 143 FftData comfort_noise; |
156 FftData high_band_comfort_noise; | 144 FftData high_band_comfort_noise; |
157 SubtractorOutput subtractor_output; | 145 SubtractorOutput subtractor_output; |
158 FftData& E_main = subtractor_output.E_main; | 146 FftData& E_main = subtractor_output.E_main; |
159 auto& E2_main = subtractor_output.E2_main; | 147 auto& E2_main = subtractor_output.E2_main; |
160 auto& E2_shadow = subtractor_output.E2_shadow; | 148 auto& E2_shadow = subtractor_output.E2_shadow; |
161 auto& e_main = subtractor_output.e_main; | 149 auto& e_main = subtractor_output.e_main; |
162 auto& e_shadow = subtractor_output.e_shadow; | |
163 | 150 |
164 // Analyze the render signal. | 151 // Analyze the render signal. |
165 render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay()); | 152 render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay()); |
166 | 153 |
167 // Perform linear echo cancellation. | 154 // Perform linear echo cancellation. |
168 subtractor_.Process(render_buffer, y0, render_signal_analyzer_, | 155 subtractor_.Process(render_buffer, y0, render_signal_analyzer_, aec_state_, |
169 aec_state_.SaturatedCapture(), &subtractor_output); | 156 &subtractor_output); |
170 | 157 |
171 // Compute spectra. | 158 // Compute spectra. |
172 fft_.ZeroPaddedFft(y0, &Y); | 159 fft_.ZeroPaddedFft(y0, &Y); |
173 LinearEchoPower(E_main, Y, &S2_linear); | 160 LinearEchoPower(E_main, Y, &S2_linear); |
174 Y.Spectrum(optimization_, &Y2); | 161 Y.Spectrum(optimization_, &Y2); |
175 | 162 |
176 // Update the AEC state information. | 163 // Update the AEC state information. |
177 aec_state_.Update(subtractor_.FilterFrequencyResponse(), | 164 aec_state_.Update(subtractor_.FilterFrequencyResponse(), |
178 echo_path_delay_samples, render_buffer, E2_main, E2_shadow, | 165 echo_path_delay_samples, render_buffer, E2_main, Y2, x0, |
179 Y2, x0, echo_path_variability, echo_leakage_detected_); | 166 echo_leakage_detected_); |
180 | |
181 // Use the power model to estimate the echo. | |
182 // TODO(peah): Remove in upcoming CL. | |
183 // power_echo_model_.EstimateEcho(render_buffer, Y2, aec_state_, &S2_power); | |
184 | 167 |
185 // Choose the linear output. | 168 // Choose the linear output. |
186 output_selector_.FormLinearOutput(e_main, y0); | 169 output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0); |
187 data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], | 170 data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], |
188 LowestBandRate(sample_rate_hz_), 1); | 171 LowestBandRate(sample_rate_hz_), 1); |
189 const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2; | 172 const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2; |
190 | 173 |
191 // Estimate the residual echo power. | 174 // Estimate the residual echo power. |
192 residual_echo_estimator_.Estimate( | 175 residual_echo_estimator_.Estimate(output_selector_.UseSubtractorOutput(), |
193 output_selector_.UseSubtractorOutput(), aec_state_, render_buffer, | 176 aec_state_, render_buffer, S2_linear, Y2, |
194 subtractor_.FilterFrequencyResponse(), E2_main, E2_shadow, S2_linear, | 177 &R2); |
195 S2_power, Y2, &R2); | |
196 | 178 |
197 // Estimate the comfort noise. | 179 // Estimate the comfort noise. |
198 cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); | 180 cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); |
199 | 181 |
200 // Detect basic doubletalk. | |
201 const bool doubletalk = BlockPower(e_shadow) < BlockPower(e_main); | |
202 | |
203 // A choose and apply echo suppression gain. | 182 // A choose and apply echo suppression gain. |
204 suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), | 183 suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), |
205 doubletalk ? 0.001f : 0.0001f, &G); | 184 aec_state_.SaturatedEcho(), x, y->size(), |
206 suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, y); | 185 &high_bands_gain, &G); |
| 186 suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, |
| 187 high_bands_gain, y); |
207 | 188 |
208 // Update the metrics. | 189 // Update the metrics. |
209 metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G); | 190 metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G); |
210 | 191 |
211 // Debug outputs for the purpose of development and analysis. | 192 // Debug outputs for the purpose of development and analysis. |
212 data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()); | 193 data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()); |
213 data_dumper_->DumpRaw("aec3_suppressor_gain", G); | 194 data_dumper_->DumpRaw("aec3_suppressor_gain", G); |
214 data_dumper_->DumpWav("aec3_output", | 195 data_dumper_->DumpWav("aec3_output", |
215 rtc::ArrayView<const float>(&y0[0], kBlockSize), | 196 rtc::ArrayView<const float>(&y0[0], kBlockSize), |
216 LowestBandRate(sample_rate_hz_), 1); | 197 LowestBandRate(sample_rate_hz_), 1); |
217 data_dumper_->DumpRaw("aec3_using_subtractor_output", | 198 data_dumper_->DumpRaw("aec3_using_subtractor_output", |
218 output_selector_.UseSubtractorOutput() ? 1 : 0); | 199 output_selector_.UseSubtractorOutput() ? 1 : 0); |
219 data_dumper_->DumpRaw("aec3_doubletalk", doubletalk ? 1 : 0); | |
220 data_dumper_->DumpRaw("aec3_E2", E2); | 200 data_dumper_->DumpRaw("aec3_E2", E2); |
221 data_dumper_->DumpRaw("aec3_E2_main", E2_main); | 201 data_dumper_->DumpRaw("aec3_E2_main", E2_main); |
222 data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow); | 202 data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow); |
223 data_dumper_->DumpRaw("aec3_S2_linear", S2_linear); | 203 data_dumper_->DumpRaw("aec3_S2_linear", S2_linear); |
224 data_dumper_->DumpRaw("aec3_S2_power", S2_power); | |
225 data_dumper_->DumpRaw("aec3_Y2", Y2); | 204 data_dumper_->DumpRaw("aec3_Y2", Y2); |
| 205 data_dumper_->DumpRaw("aec3_X2", render_buffer.Spectrum(0)); |
226 data_dumper_->DumpRaw("aec3_R2", R2); | 206 data_dumper_->DumpRaw("aec3_R2", R2); |
227 data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle()); | 207 data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle()); |
228 data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); | 208 data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); |
229 data_dumper_->DumpRaw("aec3_reliable_filter_bands", | |
230 aec_state_.BandsWithReliableFilter()); | |
231 data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender()); | 209 data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender()); |
232 data_dumper_->DumpRaw("aec3_model_based_aec_feasible", | |
233 aec_state_.ModelBasedAecFeasible()); | |
234 data_dumper_->DumpRaw("aec3_usable_linear_estimate", | 210 data_dumper_->DumpRaw("aec3_usable_linear_estimate", |
235 aec_state_.UsableLinearEstimate()); | 211 aec_state_.UsableLinearEstimate()); |
236 data_dumper_->DumpRaw( | 212 data_dumper_->DumpRaw( |
237 "aec3_filter_delay", | 213 "aec3_filter_delay", |
238 aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1); | 214 aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1); |
239 data_dumper_->DumpRaw( | 215 data_dumper_->DumpRaw( |
240 "aec3_external_delay", | 216 "aec3_external_delay", |
241 aec_state_.ExternalDelay() ? *aec_state_.ExternalDelay() : -1); | 217 aec_state_.ExternalDelay() ? *aec_state_.ExternalDelay() : -1); |
242 data_dumper_->DumpRaw("aec3_capture_saturation", | 218 data_dumper_->DumpRaw("aec3_capture_saturation", |
243 aec_state_.SaturatedCapture() ? 1 : 0); | 219 aec_state_.SaturatedCapture() ? 1 : 0); |
244 } | 220 } |
245 | 221 |
246 } // namespace | 222 } // namespace |
247 | 223 |
248 EchoRemover* EchoRemover::Create(int sample_rate_hz) { | 224 EchoRemover* EchoRemover::Create(int sample_rate_hz) { |
249 return new EchoRemoverImpl(sample_rate_hz); | 225 return new EchoRemoverImpl(sample_rate_hz); |
250 } | 226 } |
251 | 227 |
252 } // namespace webrtc | 228 } // namespace webrtc |
OLD | NEW |