Index: webrtc/modules/audio_processing/aec3/aec_state.cc |
diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc |
index 3840ef960133e142c33a52cd294ff7e6748467f8..aa389c870cfadb90d3c84034f953384fc4badfa6 100644 |
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc |
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc |
@@ -78,11 +78,11 @@ constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond; |
int AecState::instance_count_ = 0; |
-AecState::AecState(float echo_decay) |
+AecState::AecState(float reverb_decay) |
: data_dumper_( |
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
echo_path_change_counter_(kEchoPathChangeCounterInitial), |
- echo_decay_factor_(echo_decay) {} |
+ reverb_decay_(reverb_decay) {} |
AecState::~AecState() = default; |
@@ -111,12 +111,18 @@ void AecState::HandleEchoPathChange( |
void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
adaptive_filter_frequency_response, |
+ const std::array<float, kAdaptiveFilterTimeDomainLength>& |
+ adaptive_filter_impulse_response, |
const rtc::Optional<size_t>& external_delay_samples, |
const RenderBuffer& render_buffer, |
const std::array<float, kFftLengthBy2Plus1>& E2_main, |
const std::array<float, kFftLengthBy2Plus1>& Y2, |
rtc::ArrayView<const float> x, |
+ const std::array<float, kBlockSize>& s, |
bool echo_leakage_detected) { |
+ // Update the echo audibility evaluator. |
+ echo_audibility_.Update(x, s); |
+ |
// Store input parameters. |
echo_leakage_detected_ = echo_leakage_detected; |
@@ -179,6 +185,126 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
!external_delay_ && !filter_delay_ && |
(!render_received_ || |
blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks); |
+ |
+ // Update the room reverb estimate. |
+ UpdateReverb(adaptive_filter_impulse_response); |
+} |
+ |
+void AecState::UpdateReverb( |
+ const std::array<float, kAdaptiveFilterTimeDomainLength>& |
+ impulse_response) { |
+ if ((!(filter_delay_ && usable_linear_estimate_)) || |
+ (*filter_delay_ > kAdaptiveFilterLength - 4)) { |
+ return; |
+ } |
+ |
+ // Form the data to match against by squaring the impulse response |
+ // coefficients. |
+ std::array<float, kAdaptiveFilterTimeDomainLength> matching_data; |
+ std::transform(impulse_response.begin(), impulse_response.end(), |
+ matching_data.begin(), [](float a) { return a * a; }); |
+ |
+ // Avoid matching against noise in the model by subtracting an estimate of the |
+ // model noise power. |
+ constexpr size_t kTailLength = 64; |
+ constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength; |
+ const float tail_power = *std::max_element(matching_data.begin() + tail_index, |
+ matching_data.end()); |
+ std::for_each(matching_data.begin(), matching_data.begin() + tail_index, |
+ [tail_power](float& a) { a = std::max(0.f, a - tail_power); }); |
+ |
+ // Identify the peak index of the impulse response. |
+ const size_t peak_index = *std::max_element( |
+ matching_data.begin(), matching_data.begin() + tail_index); |
+ |
+ if (peak_index + 128 < tail_index) { |
+ size_t start_index = peak_index + 64; |
+ // Compute the matching residual error for the current candidate to match. |
+ float residual_sqr_sum = 0.f; |
+ float d_k = reverb_decay_to_test_; |
+ for (size_t k = start_index; k < tail_index; ++k) { |
+ if (matching_data[start_index + 1] == 0.f) { |
+ break; |
+ } |
+ |
+ float residual = matching_data[k] - matching_data[peak_index] * d_k; |
+ residual_sqr_sum += residual * residual; |
+ d_k *= reverb_decay_to_test_; |
+ } |
+ |
+ // If needed, update the best candidate for the reverb decay. |
+ if (reverb_decay_candidate_residual_ < 0.f || |
+ residual_sqr_sum < reverb_decay_candidate_residual_) { |
+ reverb_decay_candidate_residual_ = residual_sqr_sum; |
+ reverb_decay_candidate_ = reverb_decay_to_test_; |
+ } |
+ } |
+ |
+ // Compute the next reverb candidate to evaluate such that all candidates will |
+ // be evaluated within one second. |
+ reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond); |
+ |
+ // If all reverb candidates have been evaluated, choose the best one as the |
+ // reverb decay. |
+ if (reverb_decay_to_test_ >= 0.9965f) { |
+ if (reverb_decay_candidate_residual_ < 0.f) { |
+ // Transform the decay to be in the unit of blocks. |
+ reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2); |
+ |
+ // Limit the estimated reverb_decay_ to the maximum one needed in practice |
+ // to minimize the impact of incorrect estimates. |
+ reverb_decay_ = std::min(0.8f, reverb_decay_); |
+ } |
+ reverb_decay_to_test_ = 0.9f; |
+ reverb_decay_candidate_residual_ = -1.f; |
+ } |
+ |
+ // For noisy impulse responses, assume a fixed tail length. |
+ if (tail_power > 0.0005f) { |
+ reverb_decay_ = 0.7f; |
+ } |
+ data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_); |
+ data_dumper_->DumpRaw("aec3_tail_power", tail_power); |
+} |
+ |
+void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x, |
+ const std::array<float, kBlockSize>& s) { |
+ auto result_x = std::minmax_element(x.begin(), x.end()); |
+ auto result_s = std::minmax_element(s.begin(), s.end()); |
+ const float x_abs = |
+ std::max(std::abs(*result_x.first), std::abs(*result_x.second)); |
+ const float s_abs = |
+ std::max(std::abs(*result_s.first), std::abs(*result_s.second)); |
+ |
+ if (x_abs < 5.f) { |
+ ++low_farend_counter_; |
+ } else { |
+ low_farend_counter_ = 0; |
+ } |
+ |
+ // The echo is deemed as not audible if the echo estimate is on the level of |
+ // the quantization noise in the FFTs and the nearend level is sufficiently |
+ // strong to mask that by ensuring that the playout and AGC gains do not boost |
+ // any residual echo that is below the quantization noise level. Furthermore, |
+ // cases where the render signal is very close to zero are also identified as |
+ // not producing audible echo. |
+ inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f; |
+ inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20; |
+} |
+ |
+void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) { |
+ const float e_max = *std::max_element(e.begin(), e.end()); |
+ const float e_min = *std::min_element(e.begin(), e.end()); |
+ const float e_abs = std::max(std::abs(e_max), std::abs(e_min)); |
+ |
+ if (max_nearend_ < e_abs) { |
+ max_nearend_ = e_abs; |
+ max_nearend_counter_ = 0; |
+ } else { |
+ if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) { |
+ max_nearend_ *= 0.995f; |
+ } |
+ } |
} |
} // namespace webrtc |