OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" | 11 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" |
12 | 12 |
13 #include <math.h> | 13 #include <numeric> |
14 #include <vector> | 14 #include <vector> |
15 | 15 |
16 #include "webrtc/base/checks.h" | 16 #include "webrtc/base/checks.h" |
17 | 17 |
18 namespace webrtc { | 18 namespace webrtc { |
19 namespace { | 19 namespace { |
20 | 20 |
21 constexpr float kSaturationLeakageFactor = 10.f; | 21 // Estimates the echo generating signal power as gated maximal power over a time |
22 constexpr size_t kSaturationLeakageBlocks = 10; | 22 // window. |
23 constexpr size_t kEchoPathChangeConvergenceBlocks = 3 * 250; | 23 void EchoGeneratingPower(const RenderBuffer& render_buffer, |
24 size_t min_delay, | |
25 size_t max_delay, | |
26 std::array<float, kFftLengthBy2Plus1>* X2) { | |
27 X2->fill(0.f); | |
28 for (size_t k = min_delay; k <= max_delay; ++k) { | |
29 std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(), | |
30 X2->begin(), | |
31 [](float a, float b) { return std::max(a, b); }); | |
32 } | |
24 | 33 |
25 // Estimates the residual echo power when there is no detection correlation | 34 // Apply soft noise gate of -78 dBFS. |
26 // between the render and capture signals. | 35 constexpr float kNoiseGatePower = 27509.42f; |
27 void InfiniteErlPowerEstimate( | 36 std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) { |
28 size_t active_render_blocks, | 37 if (kNoiseGatePower > a) { |
29 size_t blocks_since_last_saturation, | 38 a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a)); |
ivoc
2017/04/05 15:21:25
So this is equal to 1.3*a - 0.3*kNoiseGatePower, r
peah-webrtc
2017/04/06 07:20:32
Yes, but writing it like that is a bit misleading
| |
30 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
31 std::array<float, kFftLengthBy2Plus1>* R2) { | |
32 if (active_render_blocks > 20 * 250) { | |
33 // After an amount of active render samples for which an echo should have | |
34 // been detected in the capture signal if the ERL was not infinite, set the | |
35 // residual echo to 0. | |
36 R2->fill(0.f); | |
37 } else { | |
38 // Before certainty has been reached about the presence of echo, use the | |
39 // fallback echo power estimate as the residual echo estimate. Add a leakage | |
40 // factor when there is saturation. | |
41 std::copy(S2_fallback.begin(), S2_fallback.end(), R2->begin()); | |
42 if (blocks_since_last_saturation < kSaturationLeakageBlocks) { | |
43 std::for_each(R2->begin(), R2->end(), | |
44 [](float& a) { a *= kSaturationLeakageFactor; }); | |
45 } | 39 } |
46 } | 40 }); |
47 } | 41 } |
48 | 42 |
49 // Estimates the echo power in an half-duplex manner. | 43 // Estimates the residual echo power based on the erle and the linear power |
50 void HalfDuplexPowerEstimate(bool active_render, | 44 // estimate. |
51 const std::array<float, kFftLengthBy2Plus1>& Y2, | 45 void LinearResidualPowerEstimate( |
52 std::array<float, kFftLengthBy2Plus1>* R2) { | 46 const std::array<float, kFftLengthBy2Plus1>& S2_linear, |
53 // Set the residual echo power to the power of the capture signal. | 47 const std::array<float, kFftLengthBy2Plus1>& erle, |
54 if (active_render) { | 48 std::array<int, kFftLengthBy2Plus1>* R2_hold_counter, |
55 std::copy(Y2.begin(), Y2.end(), R2->begin()); | 49 std::array<float, kFftLengthBy2Plus1>* R2) { |
56 } else { | 50 std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f); |
57 R2->fill(0.f); | 51 std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), |
58 } | 52 [](float a, float b) { |
53 RTC_DCHECK_LT(0.f, a); | |
54 return b / a; | |
55 }); | |
59 } | 56 } |
60 | 57 |
61 // Estimates the residual echo power based on gains. | 58 // Estimates the residual echo power based on the estimate of the echo path |
62 void GainBasedPowerEstimate( | 59 // gain. |
63 size_t external_delay, | 60 void NonLinearResidualPowerEstimate( |
64 const RenderBuffer& X_buffer, | 61 const std::array<float, kFftLengthBy2Plus1>& X2, |
65 size_t blocks_since_last_saturation, | 62 const std::array<float, kFftLengthBy2Plus1>& Y2, |
66 size_t active_render_blocks, | 63 const std::array<float, kFftLengthBy2Plus1>& R2_old, |
67 const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter, | 64 std::array<int, kFftLengthBy2Plus1>* R2_hold_counter, |
68 const std::array<float, kFftLengthBy2Plus1>& echo_path_gain, | |
69 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
70 std::array<float, kFftLengthBy2Plus1>* R2) { | 65 std::array<float, kFftLengthBy2Plus1>* R2) { |
71 const auto& X2 = X_buffer.Spectrum(external_delay); | 66 // Compute preliminary residual echo. |
67 // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to | |
68 // 20 dB. | |
69 std::transform(X2.begin(), X2.end(), R2->begin(), | |
70 [](float a) { return a * kFixedEchoPathGain; }); | |
72 | 71 |
73 // Base the residual echo power on gain of the linear echo path estimate if | 72 for (size_t k = 0; k < R2->size(); ++k) { |
74 // that is reliable, otherwise use the fallback echo path estimate. Add a | 73 // Update hold counter. |
75 // leakage factor when there is saturation. | 74 (*R2_hold_counter)[k] = |
76 if (active_render_blocks > kEchoPathChangeConvergenceBlocks) { | 75 R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1; |
77 for (size_t k = 0; k < R2->size(); ++k) { | |
78 (*R2)[k] = bands_with_reliable_filter[k] ? echo_path_gain[k] * X2[k] | |
79 : S2_fallback[k]; | |
80 } | |
81 } else { | |
82 for (size_t k = 0; k < R2->size(); ++k) { | |
83 (*R2)[k] = S2_fallback[k]; | |
84 } | |
85 } | |
86 | 76 |
87 if (blocks_since_last_saturation < kSaturationLeakageBlocks) { | 77 // Compute the residual echo by holding a maximum echo powers and an echo |
88 std::for_each(R2->begin(), R2->end(), | 78 // fading corresponding to a room with an RT60 value of about 50 ms. |
89 [](float& a) { a *= kSaturationLeakageFactor; }); | 79 (*R2)[k] = (*R2_hold_counter)[k] < 2 |
90 } | 80 ? std::max((*R2)[k], R2_old[k]) |
91 } | 81 : std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]); |
92 | |
93 // Estimates the residual echo power based on the linear echo path. | |
94 void ErleBasedPowerEstimate( | |
95 bool headset_detected, | |
96 const RenderBuffer& X_buffer, | |
97 bool using_subtractor_output, | |
98 size_t linear_filter_based_delay, | |
99 size_t blocks_since_last_saturation, | |
100 bool poorly_aligned_filter, | |
101 const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter, | |
102 const std::array<float, kFftLengthBy2Plus1>& echo_path_gain, | |
103 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
104 const std::array<float, kFftLengthBy2Plus1>& S2_linear, | |
105 const std::array<float, kFftLengthBy2Plus1>& Y2, | |
106 const std::array<float, kFftLengthBy2Plus1>& erle, | |
107 const std::array<float, kFftLengthBy2Plus1>& erl, | |
108 std::array<float, kFftLengthBy2Plus1>* R2) { | |
109 // Residual echo power after saturation. | |
110 if (blocks_since_last_saturation < kSaturationLeakageBlocks) { | |
111 for (size_t k = 0; k < R2->size(); ++k) { | |
112 (*R2)[k] = kSaturationLeakageFactor * | |
113 (bands_with_reliable_filter[k] && using_subtractor_output | |
114 ? S2_linear[k] | |
115 : std::min(S2_fallback[k], Y2[k])); | |
116 } | |
117 return; | |
118 } | |
119 | |
120 // Residual echo power when a headset is used. | |
121 if (headset_detected) { | |
122 const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay); | |
123 for (size_t k = 0; k < R2->size(); ++k) { | |
124 RTC_DCHECK_LT(0.f, erle[k]); | |
125 (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output | |
126 ? S2_linear[k] / erle[k] | |
127 : std::min(S2_fallback[k], Y2[k]); | |
128 (*R2)[k] = std::min((*R2)[k], X2[k] * erl[k]); | |
129 } | |
130 return; | |
131 } | |
132 | |
133 // Residual echo power when the adaptive filter is poorly aligned. | |
134 if (poorly_aligned_filter) { | |
135 for (size_t k = 0; k < R2->size(); ++k) { | |
136 (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output | |
137 ? S2_linear[k] | |
138 : std::min(S2_fallback[k], Y2[k]); | |
139 } | |
140 return; | |
141 } | |
142 | |
143 // Residual echo power when there is no recent saturation, no headset detected | |
144 // and when the adaptive filter is well aligned. | |
145 for (size_t k = 0; k < R2->size(); ++k) { | |
146 RTC_DCHECK_LT(0.f, erle[k]); | |
147 const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay); | |
148 (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output | |
149 ? S2_linear[k] / erle[k] | |
150 : std::min(echo_path_gain[k] * X2[k], Y2[k]); | |
151 } | 82 } |
152 } | 83 } |
153 | 84 |
154 } // namespace | 85 } // namespace |
155 | 86 |
156 ResidualEchoEstimator::ResidualEchoEstimator() { | 87 ResidualEchoEstimator::ResidualEchoEstimator() { |
157 echo_path_gain_.fill(100.f); | 88 R2_old_.fill(0.f); |
89 R2_hold_counter_.fill(0); | |
158 } | 90 } |
159 | 91 |
160 ResidualEchoEstimator::~ResidualEchoEstimator() = default; | 92 ResidualEchoEstimator::~ResidualEchoEstimator() = default; |
161 | 93 |
162 void ResidualEchoEstimator::Estimate( | 94 void ResidualEchoEstimator::Estimate( |
163 bool using_subtractor_output, | 95 bool using_subtractor_output, |
164 const AecState& aec_state, | 96 const AecState& aec_state, |
165 const RenderBuffer& X_buffer, | 97 const RenderBuffer& render_buffer, |
166 const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, | |
167 const std::array<float, kFftLengthBy2Plus1>& E2_main, | |
168 const std::array<float, kFftLengthBy2Plus1>& E2_shadow, | |
169 const std::array<float, kFftLengthBy2Plus1>& S2_linear, | 98 const std::array<float, kFftLengthBy2Plus1>& S2_linear, |
170 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
171 const std::array<float, kFftLengthBy2Plus1>& Y2, | 99 const std::array<float, kFftLengthBy2Plus1>& Y2, |
172 std::array<float, kFftLengthBy2Plus1>* R2) { | 100 std::array<float, kFftLengthBy2Plus1>* R2) { |
173 RTC_DCHECK(R2); | 101 RTC_DCHECK(R2); |
174 const rtc::Optional<size_t>& linear_filter_based_delay = | |
175 aec_state.FilterDelay(); | |
176 | 102 |
177 // Update the echo path gain. | 103 // Return zero residual echo power when a headset is detected. |
178 if (linear_filter_based_delay) { | 104 if (aec_state.HeadsetDetected()) { |
179 std::copy(H2[*linear_filter_based_delay].begin(), | 105 R2->fill(0.f); |
180 H2[*linear_filter_based_delay].end(), echo_path_gain_.begin()); | 106 R2_old_.fill(0.f); |
181 constexpr float kEchoPathGainHeadroom = 10.f; | 107 R2_hold_counter_.fill(0.f); |
182 std::for_each( | 108 return; |
183 echo_path_gain_.begin(), echo_path_gain_.end(), | |
184 [kEchoPathGainHeadroom](float& a) { a *= kEchoPathGainHeadroom; }); | |
185 } | 109 } |
186 | 110 |
187 // Counts the blocks since saturation. | 111 // Estimate the echo generating signal power. |
188 if (aec_state.SaturatedCapture()) { | 112 std::array<float, kFftLengthBy2Plus1> X2; |
189 blocks_since_last_saturation_ = 0; | 113 if (aec_state.ExternalDelay() || aec_state.FilterDelay()) { |
114 const int delay = | |
115 static_cast<int>(aec_state.FilterDelay() ? *aec_state.FilterDelay() | |
116 : *aec_state.ExternalDelay()); | |
117 // Computes the spectral power over that blocks surrounding the delauy.. | |
118 EchoGeneratingPower( | |
119 render_buffer, std::max(0, delay - 1), | |
120 std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2); | |
190 } else { | 121 } else { |
191 ++blocks_since_last_saturation_; | 122 // Computes the spectral power over that last 30 blocks. |
123 EchoGeneratingPower(render_buffer, 0, | |
124 kResidualEchoPowerRenderWindowSize - 1, &X2); | |
192 } | 125 } |
193 | 126 |
194 const auto& bands_with_reliable_filter = aec_state.BandsWithReliableFilter(); | 127 // Estimate the residual echo power. |
128 if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) { | |
129 LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_, | |
130 R2); | |
131 } else { | |
132 NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2); | |
133 } | |
195 | 134 |
196 if (aec_state.UsableLinearEstimate()) { | 135 // If the echo is saturated, estimate the echo power as the maximum echo power |
197 // Residual echo power estimation when the adaptive filter is reliable. | 136 // with a leakage factor. |
198 RTC_DCHECK(linear_filter_based_delay); | 137 if (aec_state.SaturatedEcho()) { |
199 ErleBasedPowerEstimate( | 138 constexpr float kSaturationLeakageFactor = 100.f; |
200 aec_state.HeadsetDetected(), X_buffer, using_subtractor_output, | 139 R2->fill((*std::max_element(R2->begin(), R2->end())) * |
201 *linear_filter_based_delay, blocks_since_last_saturation_, | 140 kSaturationLeakageFactor); |
202 aec_state.PoorlyAlignedFilter(), bands_with_reliable_filter, | |
203 echo_path_gain_, S2_fallback, S2_linear, Y2, aec_state.Erle(), | |
204 aec_state.Erl(), R2); | |
205 } else if (aec_state.ModelBasedAecFeasible()) { | |
206 // Residual echo power when the adaptive filter is not reliable but still an | |
207 // external echo path delay is provided (and hence can be estimated). | |
208 RTC_DCHECK(aec_state.ExternalDelay()); | |
209 GainBasedPowerEstimate( | |
210 *aec_state.ExternalDelay(), X_buffer, blocks_since_last_saturation_, | |
211 aec_state.ActiveRenderBlocks(), bands_with_reliable_filter, | |
212 echo_path_gain_, S2_fallback, R2); | |
213 } else if (aec_state.EchoLeakageDetected()) { | |
214 // Residual echo power when an external residual echo detection algorithm | |
215 // has deemed the echo canceller to leak echoes. | |
216 HalfDuplexPowerEstimate(aec_state.ActiveRender(), Y2, R2); | |
217 } else { | |
218 // Residual echo power when none of the other cases are fulfilled. | |
219 InfiniteErlPowerEstimate(aec_state.ActiveRenderBlocks(), | |
220 blocks_since_last_saturation_, S2_fallback, R2); | |
221 } | 141 } |
222 } | |
223 | 142 |
224 void ResidualEchoEstimator::HandleEchoPathChange( | 143 std::copy(R2->begin(), R2->end(), R2_old_.begin()); |
225 const EchoPathVariability& echo_path_variability) { | |
226 if (echo_path_variability.AudioPathChanged()) { | |
227 blocks_since_last_saturation_ = 0; | |
228 echo_path_gain_.fill(100.f); | |
229 } | |
230 } | 144 } |
231 | 145 |
232 } // namespace webrtc | 146 } // namespace webrtc |
OLD | NEW |