Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" | 11 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" |
| 12 | 12 |
| 13 #include <math.h> | 13 #include <numeric> |
| 14 #include <vector> | 14 #include <vector> |
| 15 | 15 |
| 16 #include "webrtc/base/checks.h" | 16 #include "webrtc/base/checks.h" |
| 17 | 17 |
| 18 namespace webrtc { | 18 namespace webrtc { |
| 19 namespace { | 19 namespace { |
| 20 | 20 |
| 21 constexpr float kSaturationLeakageFactor = 10.f; | 21 // Estimates the echo generating signal power as gated maximal power over a time |
| 22 constexpr size_t kSaturationLeakageBlocks = 10; | 22 // window. |
| 23 constexpr size_t kEchoPathChangeConvergenceBlocks = 3 * 250; | 23 void EchoGeneratingPower(const RenderBuffer& render_buffer, |
| 24 size_t min_delay, | |
| 25 size_t max_delay, | |
| 26 std::array<float, kFftLengthBy2Plus1>* X2) { | |
| 27 X2->fill(0.f); | |
| 28 for (size_t k = min_delay; k <= max_delay; ++k) { | |
| 29 std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(), | |
| 30 X2->begin(), | |
| 31 [](float a, float b) { return std::max(a, b); }); | |
| 32 } | |
| 24 | 33 |
| 25 // Estimates the residual echo power when there is no detection correlation | 34 // Apply soft noise gate of -78 dBFS. |
| 26 // between the render and capture signals. | 35 constexpr float kNoiseGatePower = 27509.42f; |
| 27 void InfiniteErlPowerEstimate( | 36 std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) { |
| 28 size_t active_render_blocks, | 37 if (kNoiseGatePower > a) { |
| 29 size_t blocks_since_last_saturation, | 38 a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a)); |
|
ivoc
2017/04/05 15:21:25
So this is equal to 1.3*a - 0.3*kNoiseGatePower, r
peah-webrtc
2017/04/06 07:20:32
Yes, but writing it like that is a bit misleading
| |
| 30 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
| 31 std::array<float, kFftLengthBy2Plus1>* R2) { | |
| 32 if (active_render_blocks > 20 * 250) { | |
| 33 // After an amount of active render samples for which an echo should have | |
| 34 // been detected in the capture signal if the ERL was not infinite, set the | |
| 35 // residual echo to 0. | |
| 36 R2->fill(0.f); | |
| 37 } else { | |
| 38 // Before certainty has been reached about the presence of echo, use the | |
| 39 // fallback echo power estimate as the residual echo estimate. Add a leakage | |
| 40 // factor when there is saturation. | |
| 41 std::copy(S2_fallback.begin(), S2_fallback.end(), R2->begin()); | |
| 42 if (blocks_since_last_saturation < kSaturationLeakageBlocks) { | |
| 43 std::for_each(R2->begin(), R2->end(), | |
| 44 [](float& a) { a *= kSaturationLeakageFactor; }); | |
| 45 } | 39 } |
| 46 } | 40 }); |
| 47 } | 41 } |
| 48 | 42 |
| 49 // Estimates the echo power in an half-duplex manner. | 43 // Estimates the residual echo power based on the erle and the linear power |
| 50 void HalfDuplexPowerEstimate(bool active_render, | 44 // estimate. |
| 51 const std::array<float, kFftLengthBy2Plus1>& Y2, | 45 void LinearResidualPowerEstimate( |
| 52 std::array<float, kFftLengthBy2Plus1>* R2) { | 46 const std::array<float, kFftLengthBy2Plus1>& S2_linear, |
| 53 // Set the residual echo power to the power of the capture signal. | 47 const std::array<float, kFftLengthBy2Plus1>& erle, |
| 54 if (active_render) { | 48 std::array<int, kFftLengthBy2Plus1>* R2_hold_counter, |
| 55 std::copy(Y2.begin(), Y2.end(), R2->begin()); | 49 std::array<float, kFftLengthBy2Plus1>* R2) { |
| 56 } else { | 50 std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f); |
| 57 R2->fill(0.f); | 51 std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), |
| 58 } | 52 [](float a, float b) { |
| 53 RTC_DCHECK_LT(0.f, a); | |
| 54 return b / a; | |
| 55 }); | |
| 59 } | 56 } |
| 60 | 57 |
| 61 // Estimates the residual echo power based on gains. | 58 // Estimates the residual echo power based on the estimate of the echo path |
| 62 void GainBasedPowerEstimate( | 59 // gain. |
| 63 size_t external_delay, | 60 void NonLinearResidualPowerEstimate( |
| 64 const RenderBuffer& X_buffer, | 61 const std::array<float, kFftLengthBy2Plus1>& X2, |
| 65 size_t blocks_since_last_saturation, | 62 const std::array<float, kFftLengthBy2Plus1>& Y2, |
| 66 size_t active_render_blocks, | 63 const std::array<float, kFftLengthBy2Plus1>& R2_old, |
| 67 const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter, | 64 std::array<int, kFftLengthBy2Plus1>* R2_hold_counter, |
| 68 const std::array<float, kFftLengthBy2Plus1>& echo_path_gain, | |
| 69 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
| 70 std::array<float, kFftLengthBy2Plus1>* R2) { | 65 std::array<float, kFftLengthBy2Plus1>* R2) { |
| 71 const auto& X2 = X_buffer.Spectrum(external_delay); | 66 // Compute preliminary residual echo. |
| 67 // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to | |
| 68 // 20 dB. | |
| 69 std::transform(X2.begin(), X2.end(), R2->begin(), | |
| 70 [](float a) { return a * kFixedEchoPathGain; }); | |
| 72 | 71 |
| 73 // Base the residual echo power on gain of the linear echo path estimate if | 72 for (size_t k = 0; k < R2->size(); ++k) { |
| 74 // that is reliable, otherwise use the fallback echo path estimate. Add a | 73 // Update hold counter. |
| 75 // leakage factor when there is saturation. | 74 (*R2_hold_counter)[k] = |
| 76 if (active_render_blocks > kEchoPathChangeConvergenceBlocks) { | 75 R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1; |
| 77 for (size_t k = 0; k < R2->size(); ++k) { | |
| 78 (*R2)[k] = bands_with_reliable_filter[k] ? echo_path_gain[k] * X2[k] | |
| 79 : S2_fallback[k]; | |
| 80 } | |
| 81 } else { | |
| 82 for (size_t k = 0; k < R2->size(); ++k) { | |
| 83 (*R2)[k] = S2_fallback[k]; | |
| 84 } | |
| 85 } | |
| 86 | 76 |
| 87 if (blocks_since_last_saturation < kSaturationLeakageBlocks) { | 77 // Compute the residual echo by holding a maximum echo powers and an echo |
| 88 std::for_each(R2->begin(), R2->end(), | 78 // fading corresponding to a room with an RT60 value of about 50 ms. |
| 89 [](float& a) { a *= kSaturationLeakageFactor; }); | 79 (*R2)[k] = (*R2_hold_counter)[k] < 2 |
| 90 } | 80 ? std::max((*R2)[k], R2_old[k]) |
| 91 } | 81 : std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]); |
| 92 | |
| 93 // Estimates the residual echo power based on the linear echo path. | |
| 94 void ErleBasedPowerEstimate( | |
| 95 bool headset_detected, | |
| 96 const RenderBuffer& X_buffer, | |
| 97 bool using_subtractor_output, | |
| 98 size_t linear_filter_based_delay, | |
| 99 size_t blocks_since_last_saturation, | |
| 100 bool poorly_aligned_filter, | |
| 101 const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter, | |
| 102 const std::array<float, kFftLengthBy2Plus1>& echo_path_gain, | |
| 103 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
| 104 const std::array<float, kFftLengthBy2Plus1>& S2_linear, | |
| 105 const std::array<float, kFftLengthBy2Plus1>& Y2, | |
| 106 const std::array<float, kFftLengthBy2Plus1>& erle, | |
| 107 const std::array<float, kFftLengthBy2Plus1>& erl, | |
| 108 std::array<float, kFftLengthBy2Plus1>* R2) { | |
| 109 // Residual echo power after saturation. | |
| 110 if (blocks_since_last_saturation < kSaturationLeakageBlocks) { | |
| 111 for (size_t k = 0; k < R2->size(); ++k) { | |
| 112 (*R2)[k] = kSaturationLeakageFactor * | |
| 113 (bands_with_reliable_filter[k] && using_subtractor_output | |
| 114 ? S2_linear[k] | |
| 115 : std::min(S2_fallback[k], Y2[k])); | |
| 116 } | |
| 117 return; | |
| 118 } | |
| 119 | |
| 120 // Residual echo power when a headset is used. | |
| 121 if (headset_detected) { | |
| 122 const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay); | |
| 123 for (size_t k = 0; k < R2->size(); ++k) { | |
| 124 RTC_DCHECK_LT(0.f, erle[k]); | |
| 125 (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output | |
| 126 ? S2_linear[k] / erle[k] | |
| 127 : std::min(S2_fallback[k], Y2[k]); | |
| 128 (*R2)[k] = std::min((*R2)[k], X2[k] * erl[k]); | |
| 129 } | |
| 130 return; | |
| 131 } | |
| 132 | |
| 133 // Residual echo power when the adaptive filter is poorly aligned. | |
| 134 if (poorly_aligned_filter) { | |
| 135 for (size_t k = 0; k < R2->size(); ++k) { | |
| 136 (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output | |
| 137 ? S2_linear[k] | |
| 138 : std::min(S2_fallback[k], Y2[k]); | |
| 139 } | |
| 140 return; | |
| 141 } | |
| 142 | |
| 143 // Residual echo power when there is no recent saturation, no headset detected | |
| 144 // and when the adaptive filter is well aligned. | |
| 145 for (size_t k = 0; k < R2->size(); ++k) { | |
| 146 RTC_DCHECK_LT(0.f, erle[k]); | |
| 147 const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay); | |
| 148 (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output | |
| 149 ? S2_linear[k] / erle[k] | |
| 150 : std::min(echo_path_gain[k] * X2[k], Y2[k]); | |
| 151 } | 82 } |
| 152 } | 83 } |
| 153 | 84 |
| 154 } // namespace | 85 } // namespace |
| 155 | 86 |
| 156 ResidualEchoEstimator::ResidualEchoEstimator() { | 87 ResidualEchoEstimator::ResidualEchoEstimator() { |
| 157 echo_path_gain_.fill(100.f); | 88 R2_old_.fill(0.f); |
| 89 R2_hold_counter_.fill(0); | |
| 158 } | 90 } |
| 159 | 91 |
| 160 ResidualEchoEstimator::~ResidualEchoEstimator() = default; | 92 ResidualEchoEstimator::~ResidualEchoEstimator() = default; |
| 161 | 93 |
| 162 void ResidualEchoEstimator::Estimate( | 94 void ResidualEchoEstimator::Estimate( |
| 163 bool using_subtractor_output, | 95 bool using_subtractor_output, |
| 164 const AecState& aec_state, | 96 const AecState& aec_state, |
| 165 const RenderBuffer& X_buffer, | 97 const RenderBuffer& render_buffer, |
| 166 const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, | |
| 167 const std::array<float, kFftLengthBy2Plus1>& E2_main, | |
| 168 const std::array<float, kFftLengthBy2Plus1>& E2_shadow, | |
| 169 const std::array<float, kFftLengthBy2Plus1>& S2_linear, | 98 const std::array<float, kFftLengthBy2Plus1>& S2_linear, |
| 170 const std::array<float, kFftLengthBy2Plus1>& S2_fallback, | |
| 171 const std::array<float, kFftLengthBy2Plus1>& Y2, | 99 const std::array<float, kFftLengthBy2Plus1>& Y2, |
| 172 std::array<float, kFftLengthBy2Plus1>* R2) { | 100 std::array<float, kFftLengthBy2Plus1>* R2) { |
| 173 RTC_DCHECK(R2); | 101 RTC_DCHECK(R2); |
| 174 const rtc::Optional<size_t>& linear_filter_based_delay = | |
| 175 aec_state.FilterDelay(); | |
| 176 | 102 |
| 177 // Update the echo path gain. | 103 // Return zero residual echo power when a headset is detected. |
| 178 if (linear_filter_based_delay) { | 104 if (aec_state.HeadsetDetected()) { |
| 179 std::copy(H2[*linear_filter_based_delay].begin(), | 105 R2->fill(0.f); |
| 180 H2[*linear_filter_based_delay].end(), echo_path_gain_.begin()); | 106 R2_old_.fill(0.f); |
| 181 constexpr float kEchoPathGainHeadroom = 10.f; | 107 R2_hold_counter_.fill(0.f); |
| 182 std::for_each( | 108 return; |
| 183 echo_path_gain_.begin(), echo_path_gain_.end(), | |
| 184 [kEchoPathGainHeadroom](float& a) { a *= kEchoPathGainHeadroom; }); | |
| 185 } | 109 } |
| 186 | 110 |
| 187 // Counts the blocks since saturation. | 111 // Estimate the echo generating signal power. |
| 188 if (aec_state.SaturatedCapture()) { | 112 std::array<float, kFftLengthBy2Plus1> X2; |
| 189 blocks_since_last_saturation_ = 0; | 113 if (aec_state.ExternalDelay() || aec_state.FilterDelay()) { |
| 114 const int delay = | |
| 115 static_cast<int>(aec_state.FilterDelay() ? *aec_state.FilterDelay() | |
| 116 : *aec_state.ExternalDelay()); | |
| 117 // Computes the spectral power over that blocks surrounding the delauy.. | |
| 118 EchoGeneratingPower( | |
| 119 render_buffer, std::max(0, delay - 1), | |
| 120 std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2); | |
| 190 } else { | 121 } else { |
| 191 ++blocks_since_last_saturation_; | 122 // Computes the spectral power over that last 30 blocks. |
| 123 EchoGeneratingPower(render_buffer, 0, | |
| 124 kResidualEchoPowerRenderWindowSize - 1, &X2); | |
| 192 } | 125 } |
| 193 | 126 |
| 194 const auto& bands_with_reliable_filter = aec_state.BandsWithReliableFilter(); | 127 // Estimate the residual echo power. |
| 128 if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) { | |
| 129 LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_, | |
| 130 R2); | |
| 131 } else { | |
| 132 NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2); | |
| 133 } | |
| 195 | 134 |
| 196 if (aec_state.UsableLinearEstimate()) { | 135 // If the echo is saturated, estimate the echo power as the maximum echo power |
| 197 // Residual echo power estimation when the adaptive filter is reliable. | 136 // with a leakage factor. |
| 198 RTC_DCHECK(linear_filter_based_delay); | 137 if (aec_state.SaturatedEcho()) { |
| 199 ErleBasedPowerEstimate( | 138 constexpr float kSaturationLeakageFactor = 100.f; |
| 200 aec_state.HeadsetDetected(), X_buffer, using_subtractor_output, | 139 R2->fill((*std::max_element(R2->begin(), R2->end())) * |
| 201 *linear_filter_based_delay, blocks_since_last_saturation_, | 140 kSaturationLeakageFactor); |
| 202 aec_state.PoorlyAlignedFilter(), bands_with_reliable_filter, | |
| 203 echo_path_gain_, S2_fallback, S2_linear, Y2, aec_state.Erle(), | |
| 204 aec_state.Erl(), R2); | |
| 205 } else if (aec_state.ModelBasedAecFeasible()) { | |
| 206 // Residual echo power when the adaptive filter is not reliable but still an | |
| 207 // external echo path delay is provided (and hence can be estimated). | |
| 208 RTC_DCHECK(aec_state.ExternalDelay()); | |
| 209 GainBasedPowerEstimate( | |
| 210 *aec_state.ExternalDelay(), X_buffer, blocks_since_last_saturation_, | |
| 211 aec_state.ActiveRenderBlocks(), bands_with_reliable_filter, | |
| 212 echo_path_gain_, S2_fallback, R2); | |
| 213 } else if (aec_state.EchoLeakageDetected()) { | |
| 214 // Residual echo power when an external residual echo detection algorithm | |
| 215 // has deemed the echo canceller to leak echoes. | |
| 216 HalfDuplexPowerEstimate(aec_state.ActiveRender(), Y2, R2); | |
| 217 } else { | |
| 218 // Residual echo power when none of the other cases are fulfilled. | |
| 219 InfiniteErlPowerEstimate(aec_state.ActiveRenderBlocks(), | |
| 220 blocks_since_last_saturation_, S2_fallback, R2); | |
| 221 } | 141 } |
| 222 } | |
| 223 | 142 |
| 224 void ResidualEchoEstimator::HandleEchoPathChange( | 143 std::copy(R2->begin(), R2->end(), R2_old_.begin()); |
| 225 const EchoPathVariability& echo_path_variability) { | |
| 226 if (echo_path_variability.AudioPathChanged()) { | |
| 227 blocks_since_last_saturation_ = 0; | |
| 228 echo_path_gain_.fill(100.f); | |
| 229 } | |
| 230 } | 144 } |
| 231 | 145 |
| 232 } // namespace webrtc | 146 } // namespace webrtc |
| OLD | NEW |