| Index: webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
|
| diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
|
| index 993a8da8bdef3d7a080a6c5b3433f04280fc2c96..fd848d30af756bfbacb13dce5ec687060256e9eb 100644
|
| --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
|
| +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
|
| @@ -10,7 +10,7 @@
|
|
|
| #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
|
|
|
| -#include <math.h>
|
| +#include <numeric>
|
| #include <vector>
|
|
|
| #include "webrtc/base/checks.h"
|
| @@ -18,143 +18,75 @@
|
| namespace webrtc {
|
| namespace {
|
|
|
| -constexpr float kSaturationLeakageFactor = 10.f;
|
| -constexpr size_t kSaturationLeakageBlocks = 10;
|
| -constexpr size_t kEchoPathChangeConvergenceBlocks = 3 * 250;
|
| -
|
| -// Estimates the residual echo power when there is no detection correlation
|
| -// between the render and capture signals.
|
| -void InfiniteErlPowerEstimate(
|
| - size_t active_render_blocks,
|
| - size_t blocks_since_last_saturation,
|
| - const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
|
| - std::array<float, kFftLengthBy2Plus1>* R2) {
|
| - if (active_render_blocks > 20 * 250) {
|
| - // After an amount of active render samples for which an echo should have
|
| - // been detected in the capture signal if the ERL was not infinite, set the
|
| - // residual echo to 0.
|
| - R2->fill(0.f);
|
| - } else {
|
| - // Before certainty has been reached about the presence of echo, use the
|
| - // fallback echo power estimate as the residual echo estimate. Add a leakage
|
| - // factor when there is saturation.
|
| - std::copy(S2_fallback.begin(), S2_fallback.end(), R2->begin());
|
| - if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
|
| - std::for_each(R2->begin(), R2->end(),
|
| - [](float& a) { a *= kSaturationLeakageFactor; });
|
| - }
|
| +// Estimates the echo generating signal power as gated maximal power over a time
|
| +// window.
|
| +void EchoGeneratingPower(const RenderBuffer& render_buffer,
|
| + size_t min_delay,
|
| + size_t max_delay,
|
| + std::array<float, kFftLengthBy2Plus1>* X2) {
|
| + X2->fill(0.f);
|
| + for (size_t k = min_delay; k <= max_delay; ++k) {
|
| + std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
|
| + X2->begin(),
|
| + [](float a, float b) { return std::max(a, b); });
|
| }
|
| -}
|
|
|
| -// Estimates the echo power in an half-duplex manner.
|
| -void HalfDuplexPowerEstimate(bool active_render,
|
| - const std::array<float, kFftLengthBy2Plus1>& Y2,
|
| - std::array<float, kFftLengthBy2Plus1>* R2) {
|
| - // Set the residual echo power to the power of the capture signal.
|
| - if (active_render) {
|
| - std::copy(Y2.begin(), Y2.end(), R2->begin());
|
| - } else {
|
| - R2->fill(0.f);
|
| - }
|
| -}
|
| -
|
| -// Estimates the residual echo power based on gains.
|
| -void GainBasedPowerEstimate(
|
| - size_t external_delay,
|
| - const RenderBuffer& X_buffer,
|
| - size_t blocks_since_last_saturation,
|
| - size_t active_render_blocks,
|
| - const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter,
|
| - const std::array<float, kFftLengthBy2Plus1>& echo_path_gain,
|
| - const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
|
| - std::array<float, kFftLengthBy2Plus1>* R2) {
|
| - const auto& X2 = X_buffer.Spectrum(external_delay);
|
| -
|
| - // Base the residual echo power on gain of the linear echo path estimate if
|
| - // that is reliable, otherwise use the fallback echo path estimate. Add a
|
| - // leakage factor when there is saturation.
|
| - if (active_render_blocks > kEchoPathChangeConvergenceBlocks) {
|
| - for (size_t k = 0; k < R2->size(); ++k) {
|
| - (*R2)[k] = bands_with_reliable_filter[k] ? echo_path_gain[k] * X2[k]
|
| - : S2_fallback[k];
|
| + // Apply soft noise gate of -78 dBFS.
|
| + constexpr float kNoiseGatePower = 27509.42f;
|
| + std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) {
|
| + if (kNoiseGatePower > a) {
|
| + a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
|
| }
|
| - } else {
|
| - for (size_t k = 0; k < R2->size(); ++k) {
|
| - (*R2)[k] = S2_fallback[k];
|
| - }
|
| - }
|
| -
|
| - if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
|
| - std::for_each(R2->begin(), R2->end(),
|
| - [](float& a) { a *= kSaturationLeakageFactor; });
|
| - }
|
| + });
|
| }
|
|
|
| -// Estimates the residual echo power based on the linear echo path.
|
| -void ErleBasedPowerEstimate(
|
| - bool headset_detected,
|
| - const RenderBuffer& X_buffer,
|
| - bool using_subtractor_output,
|
| - size_t linear_filter_based_delay,
|
| - size_t blocks_since_last_saturation,
|
| - bool poorly_aligned_filter,
|
| - const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter,
|
| - const std::array<float, kFftLengthBy2Plus1>& echo_path_gain,
|
| - const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
|
| +// Estimates the residual echo power based on the erle and the linear power
|
| +// estimate.
|
| +void LinearResidualPowerEstimate(
|
| const std::array<float, kFftLengthBy2Plus1>& S2_linear,
|
| - const std::array<float, kFftLengthBy2Plus1>& Y2,
|
| const std::array<float, kFftLengthBy2Plus1>& erle,
|
| - const std::array<float, kFftLengthBy2Plus1>& erl,
|
| + std::array<int, kFftLengthBy2Plus1>* R2_hold_counter,
|
| std::array<float, kFftLengthBy2Plus1>* R2) {
|
| - // Residual echo power after saturation.
|
| - if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
|
| - for (size_t k = 0; k < R2->size(); ++k) {
|
| - (*R2)[k] = kSaturationLeakageFactor *
|
| - (bands_with_reliable_filter[k] && using_subtractor_output
|
| - ? S2_linear[k]
|
| - : std::min(S2_fallback[k], Y2[k]));
|
| - }
|
| - return;
|
| - }
|
| -
|
| - // Residual echo power when a headset is used.
|
| - if (headset_detected) {
|
| - const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay);
|
| - for (size_t k = 0; k < R2->size(); ++k) {
|
| - RTC_DCHECK_LT(0.f, erle[k]);
|
| - (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
|
| - ? S2_linear[k] / erle[k]
|
| - : std::min(S2_fallback[k], Y2[k]);
|
| - (*R2)[k] = std::min((*R2)[k], X2[k] * erl[k]);
|
| - }
|
| - return;
|
| - }
|
| + std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f);
|
| + std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
|
| + [](float a, float b) {
|
| + RTC_DCHECK_LT(0.f, a);
|
| + return b / a;
|
| + });
|
| +}
|
|
|
| - // Residual echo power when the adaptive filter is poorly aligned.
|
| - if (poorly_aligned_filter) {
|
| - for (size_t k = 0; k < R2->size(); ++k) {
|
| - (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
|
| - ? S2_linear[k]
|
| - : std::min(S2_fallback[k], Y2[k]);
|
| - }
|
| - return;
|
| - }
|
| +// Estimates the residual echo power based on the estimate of the echo path
|
| +// gain.
|
| +void NonLinearResidualPowerEstimate(
|
| + const std::array<float, kFftLengthBy2Plus1>& X2,
|
| + const std::array<float, kFftLengthBy2Plus1>& Y2,
|
| + const std::array<float, kFftLengthBy2Plus1>& R2_old,
|
| + std::array<int, kFftLengthBy2Plus1>* R2_hold_counter,
|
| + std::array<float, kFftLengthBy2Plus1>* R2) {
|
| + // Compute preliminary residual echo.
|
| + // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to
|
| + // 20 dB.
|
| + std::transform(X2.begin(), X2.end(), R2->begin(),
|
| + [](float a) { return a * kFixedEchoPathGain; });
|
|
|
| - // Residual echo power when there is no recent saturation, no headset detected
|
| - // and when the adaptive filter is well aligned.
|
| for (size_t k = 0; k < R2->size(); ++k) {
|
| - RTC_DCHECK_LT(0.f, erle[k]);
|
| - const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay);
|
| - (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
|
| - ? S2_linear[k] / erle[k]
|
| - : std::min(echo_path_gain[k] * X2[k], Y2[k]);
|
| + // Update hold counter.
|
| + (*R2_hold_counter)[k] =
|
| + R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1;
|
| +
|
| + // Compute the residual echo by holding a maximum echo powers and an echo
|
| + // fading corresponding to a room with an RT60 value of about 50 ms.
|
| + (*R2)[k] = (*R2_hold_counter)[k] < 2
|
| + ? std::max((*R2)[k], R2_old[k])
|
| + : std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]);
|
| }
|
| }
|
|
|
| } // namespace
|
|
|
| ResidualEchoEstimator::ResidualEchoEstimator() {
|
| - echo_path_gain_.fill(100.f);
|
| + R2_old_.fill(0.f);
|
| + R2_hold_counter_.fill(0);
|
| }
|
|
|
| ResidualEchoEstimator::~ResidualEchoEstimator() = default;
|
| @@ -162,71 +94,53 @@ ResidualEchoEstimator::~ResidualEchoEstimator() = default;
|
| void ResidualEchoEstimator::Estimate(
|
| bool using_subtractor_output,
|
| const AecState& aec_state,
|
| - const RenderBuffer& X_buffer,
|
| - const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
| - const std::array<float, kFftLengthBy2Plus1>& E2_main,
|
| - const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
|
| + const RenderBuffer& render_buffer,
|
| const std::array<float, kFftLengthBy2Plus1>& S2_linear,
|
| - const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
|
| const std::array<float, kFftLengthBy2Plus1>& Y2,
|
| std::array<float, kFftLengthBy2Plus1>* R2) {
|
| RTC_DCHECK(R2);
|
| - const rtc::Optional<size_t>& linear_filter_based_delay =
|
| - aec_state.FilterDelay();
|
| -
|
| - // Update the echo path gain.
|
| - if (linear_filter_based_delay) {
|
| - std::copy(H2[*linear_filter_based_delay].begin(),
|
| - H2[*linear_filter_based_delay].end(), echo_path_gain_.begin());
|
| - constexpr float kEchoPathGainHeadroom = 10.f;
|
| - std::for_each(
|
| - echo_path_gain_.begin(), echo_path_gain_.end(),
|
| - [kEchoPathGainHeadroom](float& a) { a *= kEchoPathGainHeadroom; });
|
| +
|
| + // Return zero residual echo power when a headset is detected.
|
| + if (aec_state.HeadsetDetected()) {
|
| + R2->fill(0.f);
|
| + R2_old_.fill(0.f);
|
| + R2_hold_counter_.fill(0.f);
|
| + return;
|
| }
|
|
|
| - // Counts the blocks since saturation.
|
| - if (aec_state.SaturatedCapture()) {
|
| - blocks_since_last_saturation_ = 0;
|
| + // Estimate the echo generating signal power.
|
| + std::array<float, kFftLengthBy2Plus1> X2;
|
| + if (aec_state.ExternalDelay() || aec_state.FilterDelay()) {
|
| + const int delay =
|
| + static_cast<int>(aec_state.FilterDelay() ? *aec_state.FilterDelay()
|
| + : *aec_state.ExternalDelay());
|
| + // Computes the spectral power over that blocks surrounding the delauy..
|
| + EchoGeneratingPower(
|
| + render_buffer, std::max(0, delay - 1),
|
| + std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2);
|
| } else {
|
| - ++blocks_since_last_saturation_;
|
| + // Computes the spectral power over that last 30 blocks.
|
| + EchoGeneratingPower(render_buffer, 0,
|
| + kResidualEchoPowerRenderWindowSize - 1, &X2);
|
| }
|
|
|
| - const auto& bands_with_reliable_filter = aec_state.BandsWithReliableFilter();
|
| -
|
| - if (aec_state.UsableLinearEstimate()) {
|
| - // Residual echo power estimation when the adaptive filter is reliable.
|
| - RTC_DCHECK(linear_filter_based_delay);
|
| - ErleBasedPowerEstimate(
|
| - aec_state.HeadsetDetected(), X_buffer, using_subtractor_output,
|
| - *linear_filter_based_delay, blocks_since_last_saturation_,
|
| - aec_state.PoorlyAlignedFilter(), bands_with_reliable_filter,
|
| - echo_path_gain_, S2_fallback, S2_linear, Y2, aec_state.Erle(),
|
| - aec_state.Erl(), R2);
|
| - } else if (aec_state.ModelBasedAecFeasible()) {
|
| - // Residual echo power when the adaptive filter is not reliable but still an
|
| - // external echo path delay is provided (and hence can be estimated).
|
| - RTC_DCHECK(aec_state.ExternalDelay());
|
| - GainBasedPowerEstimate(
|
| - *aec_state.ExternalDelay(), X_buffer, blocks_since_last_saturation_,
|
| - aec_state.ActiveRenderBlocks(), bands_with_reliable_filter,
|
| - echo_path_gain_, S2_fallback, R2);
|
| - } else if (aec_state.EchoLeakageDetected()) {
|
| - // Residual echo power when an external residual echo detection algorithm
|
| - // has deemed the echo canceller to leak echoes.
|
| - HalfDuplexPowerEstimate(aec_state.ActiveRender(), Y2, R2);
|
| + // Estimate the residual echo power.
|
| + if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) {
|
| + LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_,
|
| + R2);
|
| } else {
|
| - // Residual echo power when none of the other cases are fulfilled.
|
| - InfiniteErlPowerEstimate(aec_state.ActiveRenderBlocks(),
|
| - blocks_since_last_saturation_, S2_fallback, R2);
|
| + NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2);
|
| }
|
| -}
|
|
|
| -void ResidualEchoEstimator::HandleEchoPathChange(
|
| - const EchoPathVariability& echo_path_variability) {
|
| - if (echo_path_variability.AudioPathChanged()) {
|
| - blocks_since_last_saturation_ = 0;
|
| - echo_path_gain_.fill(100.f);
|
| + // If the echo is saturated, estimate the echo power as the maximum echo power
|
| + // with a leakage factor.
|
| + if (aec_state.SaturatedEcho()) {
|
| + constexpr float kSaturationLeakageFactor = 100.f;
|
| + R2->fill((*std::max_element(R2->begin(), R2->end())) *
|
| + kSaturationLeakageFactor);
|
| }
|
| +
|
| + std::copy(R2->begin(), R2->end(), R2_old_.begin());
|
| }
|
|
|
| } // namespace webrtc
|
|
|