webrtc/modules/audio_processing/aec3/aec_state.cc - Issue 2782423003: Major updates to the echo removal functionality in AEC3

Unified Diff: webrtc/modules/audio_processing/aec3/aec_state.cc

Issue 2782423003: Major updates to the echo removal functionality in AEC3 (Closed)

Patch Set: Added initialization of uninitialized vector Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « webrtc/modules/audio_processing/aec3/aec_state.h ('k') | webrtc/modules/audio_processing/aec3/aec_state_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/aec3/aec_state.cc

diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc

index d2c0bddc59adedb11cf3c34f7ee8e6c1cfd9ab41..8e92f5fbdac3a4719428e1275bf966ca73b58151 100644

--- a/webrtc/modules/audio_processing/aec3/aec_state.cc

+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc

@@ -14,6 +14,7 @@

#include <numeric>

#include <vector>

+#include "webrtc/base/array_view.h"

#include "webrtc/base/atomicops.h"

#include "webrtc/base/checks.h"

#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

@@ -21,23 +22,23 @@

namespace webrtc {

namespace {

-constexpr float kMaxFilterEstimateStrength = 1000.f;

+constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond;

+constexpr size_t kSaturationLeakageBlocks = 20;

-// Compute the delay of the adaptive filter as the partition with a distinct

-// peak.

-void AnalyzeFilter(

+// Computes delay of the adaptive filter.

+rtc::Optional<size_t> EstimateFilterDelay(

const std::vector<std::array<float, kFftLengthBy2Plus1>>&

- filter_frequency_response,

- std::array<bool, kFftLengthBy2Plus1>* bands_with_reliable_filter,

- std::array<float, kFftLengthBy2Plus1>* filter_estimate_strength,

- rtc::Optional<size_t>* filter_delay) {

- const auto& H2 = filter_frequency_response;

+ adaptive_filter_frequency_response) {

+ const auto& H2 = adaptive_filter_frequency_response;

size_t reliable_delays_sum = 0;

size_t num_reliable_delays = 0;

constexpr size_t kUpperBin = kFftLengthBy2 - 5;

+ constexpr float kMinPeakMargin = 10.f;

+ const size_t kTailPartition = H2.size() - 1;

for (size_t k = 1; k < kUpperBin; ++k) {

+ // Find the maximum of H2[j].

int peak = 0;

for (size_t j = 0; j < H2.size(); ++j) {

if (H2[j][k] > H2[peak][k]) {

@@ -45,43 +46,33 @@ void AnalyzeFilter(

}

- if (H2[peak][k] == 0.f) {

- (*filter_estimate_strength)[k] = 0.f;

- } else if (H2[H2.size() - 1][k] == 0.f) {

- (*filter_estimate_strength)[k] = kMaxFilterEstimateStrength;

- } else {

- (*filter_estimate_strength)[k] = std::min(

- kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]);

- }

- constexpr float kMargin = 10.f;

- if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) {

- (*bands_with_reliable_filter)[k] = true;

+ // Count the peak as a delay only if the peak is sufficiently larger than

+ // the tail.

+ if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) {

reliable_delays_sum += peak;

++num_reliable_delays;

- } else {

- (*bands_with_reliable_filter)[k] = false;

}

- (*bands_with_reliable_filter)[0] = (*bands_with_reliable_filter)[1];

- std::fill(bands_with_reliable_filter->begin() + kUpperBin,

- bands_with_reliable_filter->end(),

- (*bands_with_reliable_filter)[kUpperBin - 1]);

- (*filter_estimate_strength)[0] = (*filter_estimate_strength)[1];

- std::fill(filter_estimate_strength->begin() + kUpperBin,

- filter_estimate_strength->end(),

- (*filter_estimate_strength)[kUpperBin - 1]);

- *filter_delay =

- num_reliable_delays > 20

- ? rtc::Optional<size_t>(reliable_delays_sum / num_reliable_delays)

- : rtc::Optional<size_t>();

+ // Return no delay if not sufficient delays have been found.

+ if (num_reliable_delays < 21) {

+ return rtc::Optional<size_t>();

+ }

+ const size_t delay = reliable_delays_sum / num_reliable_delays;

+ // Sanity check that the peak is not caused by a false strong DC-component in

+ // the filter.

+ for (size_t k = 1; k < kUpperBin; ++k) {

+ if (H2[delay][k] > H2[delay][0]) {

+ RTC_DCHECK_GT(H2.size(), delay);

+ return rtc::Optional<size_t>(delay);

+ }

+ return rtc::Optional<size_t>();

}

-constexpr int kActiveRenderCounterInitial = 50;

-constexpr int kActiveRenderCounterMax = 200;

-constexpr int kEchoPathChangeCounterInitial = 50;

-constexpr int kEchoPathChangeCounterMax = 3 * 250;

+constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;

+constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond;

} // namespace

@@ -90,76 +81,80 @@ int AecState::instance_count_ = 0;

AecState::AecState()

: data_dumper_(

new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),

- echo_path_change_counter_(kEchoPathChangeCounterInitial),

- active_render_counter_(kActiveRenderCounterInitial) {

- bands_with_reliable_filter_.fill(false);

- filter_estimate_strength_.fill(0.f);

+ echo_path_change_counter_(kEchoPathChangeCounterInitial) {}

AecState::~AecState() = default;

+void AecState::HandleEchoPathChange(

+ const EchoPathVariability& echo_path_variability) {

+ if (echo_path_variability.AudioPathChanged()) {

+ blocks_since_last_saturation_ = 0;

+ active_render_blocks_ = 0;

+ echo_path_change_counter_ = kEchoPathChangeCounterMax;

+ usable_linear_estimate_ = false;

+ echo_leakage_detected_ = false;

+ capture_signal_saturation_ = false;

+ echo_saturation_ = false;

+ headset_detected_ = false;

+ previous_max_sample_ = 0.f;

+ }

void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&

- filter_frequency_response,

+ adaptive_filter_frequency_response,

const rtc::Optional<size_t>& external_delay_samples,

- const RenderBuffer& X_buffer,

+ const RenderBuffer& render_buffer,

const std::array<float, kFftLengthBy2Plus1>& E2_main,

- const std::array<float, kFftLengthBy2Plus1>& E2_shadow,

const std::array<float, kFftLengthBy2Plus1>& Y2,

rtc::ArrayView<const float> x,

- const EchoPathVariability& echo_path_variability,

bool echo_leakage_detected) {

- filter_length_ = filter_frequency_response.size();

- AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_,

- &filter_estimate_strength_, &filter_delay_);

- // Compute the externally provided delay in partitions. The truncation is

- // intended here.

+ // Store input parameters.

+ echo_leakage_detected_ = echo_leakage_detected;

+ // Update counters.

+ const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);

+ const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;

+ active_render_blocks_ += active_render_block ? 1 : 0;

+ --echo_path_change_counter_;

+ // Estimate delays.

+ filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);

external_delay_ =

external_delay_samples

? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)

: rtc::Optional<size_t>();

- const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);

- active_render_blocks_ =

- echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1;

- echo_path_change_counter_ = echo_path_variability.AudioPathChanged()

- ? kEchoPathChangeCounterMax

- : echo_path_change_counter_ - 1;

- active_render_counter_ = x_energy > 10000.f * kFftLengthBy2

- ? kActiveRenderCounterMax

- : active_render_counter_ - 1;

- usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0;

- echo_leakage_detected_ = echo_leakage_detected;

- model_based_aec_feasible_ = usable_linear_estimate_ || external_delay_;

- if (usable_linear_estimate_) {

- const auto& X2 = X_buffer.Spectrum(*filter_delay_);

- // TODO(peah): Expose these as stats.

+ // Update the ERL and ERLE measures.

+ if (filter_delay_ && echo_path_change_counter_ <= 0) {

+ const auto& X2 = render_buffer.Spectrum(*filter_delay_);

erle_estimator_.Update(X2, Y2, E2_main);

erl_estimator_.Update(X2, Y2);

-// TODO(peah): Add working functionality for headset detection. Until the

-// functionality for that is working the headset detector is hardcoded to detect

-// no headset.

-#if 0

- const auto& erl = erl_estimator_.Erl();

- const int low_erl_band_count = std::count_if(

- erl.begin(), erl.end(), [](float a) { return a <= 0.1f; });

- const int noisy_band_count = std::count_if(

- filter_estimate_strength_.begin(), filter_estimate_strength_.end(),

- [](float a) { return a <= 10.f; });

- headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20;

-#endif

- headset_detected_ = false;

- } else {

- headset_detected_ = false;

}

+ // Detect and flag echo saturation.

+ RTC_DCHECK_LT(0, x.size());

+ const float max_sample = fabs(*std::max_element(

+ x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));

+ const bool saturated_echo =

+ previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture();

+ previous_max_sample_ = max_sample;

+ // Counts the blocks since saturation.

+ blocks_since_last_saturation_ =

+ saturated_echo ? 0 : blocks_since_last_saturation_ + 1;

+ echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;

+ // Flag whether the linear filter estimate is usable.

+ usable_linear_estimate_ =

+ (!echo_saturation_) &&

+ active_render_blocks_ > kEchoPathChangeConvergenceBlocks &&

+ filter_delay_ && echo_path_change_counter_ <= 0;

+ // After an amount of active render samples for which an echo should have been

+ // detected in the capture signal if the ERL was not infinite, flag that a

+ // headset is used.

+ headset_detected_ = !external_delay_ && !filter_delay_ &&

+ active_render_blocks_ >= kEchoPathChangeConvergenceBlocks;

}

} // namespace webrtc

« no previous file with comments | « webrtc/modules/audio_processing/aec3/aec_state.h ('k') | webrtc/modules/audio_processing/aec3/aec_state_unittest.cc » ('j') | no next file with comments »