Index: webrtc/modules/audio_processing/aec3/aec_state.cc |
diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc |
index d2c0bddc59adedb11cf3c34f7ee8e6c1cfd9ab41..9c99300e67808d1938d836fe751ba5862d680d76 100644 |
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc |
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc |
@@ -14,6 +14,7 @@ |
#include <numeric> |
#include <vector> |
+#include "webrtc/base/array_view.h" |
#include "webrtc/base/atomicops.h" |
#include "webrtc/base/checks.h" |
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" |
@@ -21,23 +22,23 @@ |
namespace webrtc { |
namespace { |
-constexpr float kMaxFilterEstimateStrength = 1000.f; |
+constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond; |
+constexpr size_t kSaturationLeakageBlocks = 20; |
-// Compute the delay of the adaptive filter as the partition with a distinct |
-// peak. |
-void AnalyzeFilter( |
+// Computes delay of the adaptive filter. |
+rtc::Optional<size_t> EstimateFilterDelay( |
const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
- filter_frequency_response, |
- std::array<bool, kFftLengthBy2Plus1>* bands_with_reliable_filter, |
- std::array<float, kFftLengthBy2Plus1>* filter_estimate_strength, |
- rtc::Optional<size_t>* filter_delay) { |
- const auto& H2 = filter_frequency_response; |
+ adaptive_filter_frequency_response) { |
+ const auto& H2 = adaptive_filter_frequency_response; |
size_t reliable_delays_sum = 0; |
size_t num_reliable_delays = 0; |
constexpr size_t kUpperBin = kFftLengthBy2 - 5; |
+ constexpr float kMinPeakMargin = 10.f; |
+ const size_t kTailPartition = H2.size() - 1; |
for (size_t k = 1; k < kUpperBin; ++k) { |
+ // Find the maximum of H2[j]. |
int peak = 0; |
for (size_t j = 0; j < H2.size(); ++j) { |
if (H2[j][k] > H2[peak][k]) { |
@@ -45,43 +46,32 @@ void AnalyzeFilter( |
} |
} |
- if (H2[peak][k] == 0.f) { |
- (*filter_estimate_strength)[k] = 0.f; |
- } else if (H2[H2.size() - 1][k] == 0.f) { |
- (*filter_estimate_strength)[k] = kMaxFilterEstimateStrength; |
- } else { |
- (*filter_estimate_strength)[k] = std::min( |
- kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]); |
- } |
- |
- constexpr float kMargin = 10.f; |
- if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) { |
- (*bands_with_reliable_filter)[k] = true; |
+ // Count the peak as a delay only if the peak is sufficiently larger than |
+ // the tail. |
+ if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) { |
reliable_delays_sum += peak; |
++num_reliable_delays; |
- } else { |
- (*bands_with_reliable_filter)[k] = false; |
} |
} |
- (*bands_with_reliable_filter)[0] = (*bands_with_reliable_filter)[1]; |
- std::fill(bands_with_reliable_filter->begin() + kUpperBin, |
- bands_with_reliable_filter->end(), |
- (*bands_with_reliable_filter)[kUpperBin - 1]); |
- (*filter_estimate_strength)[0] = (*filter_estimate_strength)[1]; |
- std::fill(filter_estimate_strength->begin() + kUpperBin, |
- filter_estimate_strength->end(), |
- (*filter_estimate_strength)[kUpperBin - 1]); |
- |
- *filter_delay = |
- num_reliable_delays > 20 |
- ? rtc::Optional<size_t>(reliable_delays_sum / num_reliable_delays) |
- : rtc::Optional<size_t>(); |
+ |
+ // Return no delay if not sufficient delays have been found. |
+ if (num_reliable_delays < 21) { |
+ return rtc::Optional<size_t>(); |
+ } |
+ |
+ // Sanity check that the peak is not caused by a false strong DC-component in |
+ // the filter. |
ivoc
2017/04/05 15:21:25
I think it makes more sense to move this comment j
peah-webrtc
2017/04/06 07:20:32
Done.
|
+ const size_t delay = reliable_delays_sum / num_reliable_delays; |
ivoc
2017/04/05 15:21:25
As a sanity check I think it would be good to add
peah-webrtc
2017/04/06 07:20:32
Done.
|
+ for (size_t k = 1; k < kUpperBin; ++k) { |
+ if (H2[delay][k] > H2[delay][0]) { |
+ return rtc::Optional<size_t>(delay); |
+ } |
+ } |
+ return rtc::Optional<size_t>(); |
} |
-constexpr int kActiveRenderCounterInitial = 50; |
-constexpr int kActiveRenderCounterMax = 200; |
-constexpr int kEchoPathChangeCounterInitial = 50; |
-constexpr int kEchoPathChangeCounterMax = 3 * 250; |
+constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5; |
+constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond; |
} // namespace |
@@ -90,76 +80,79 @@ int AecState::instance_count_ = 0; |
AecState::AecState() |
: data_dumper_( |
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
- echo_path_change_counter_(kEchoPathChangeCounterInitial), |
- active_render_counter_(kActiveRenderCounterInitial) { |
- bands_with_reliable_filter_.fill(false); |
- filter_estimate_strength_.fill(0.f); |
-} |
+ echo_path_change_counter_(kEchoPathChangeCounterInitial) {} |
AecState::~AecState() = default; |
+void AecState::HandleEchoPathChange( |
+ const EchoPathVariability& echo_path_variability) { |
+ if (echo_path_variability.AudioPathChanged()) { |
+ blocks_since_last_saturation_ = 0; |
+ active_render_blocks_ = 0; |
+ echo_path_change_counter_ = kEchoPathChangeCounterMax; |
+ usable_linear_estimate_ = false; |
+ echo_leakage_detected_ = false; |
+ capture_signal_saturation_ = false; |
+ echo_saturation_ = false; |
+ headset_detected_ = false; |
+ previous_max_sample_ = 0.f; |
+ } |
+} |
+ |
void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
- filter_frequency_response, |
+ adaptive_filter_frequency_response, |
const rtc::Optional<size_t>& external_delay_samples, |
- const RenderBuffer& X_buffer, |
+ const RenderBuffer& render_buffer, |
const std::array<float, kFftLengthBy2Plus1>& E2_main, |
- const std::array<float, kFftLengthBy2Plus1>& E2_shadow, |
const std::array<float, kFftLengthBy2Plus1>& Y2, |
rtc::ArrayView<const float> x, |
- const EchoPathVariability& echo_path_variability, |
bool echo_leakage_detected) { |
- filter_length_ = filter_frequency_response.size(); |
- AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_, |
- &filter_estimate_strength_, &filter_delay_); |
- // Compute the externally provided delay in partitions. The truncation is |
- // intended here. |
+ // Store input parameters. |
+ echo_leakage_detected_ = echo_leakage_detected; |
+ |
+ // Update counters. |
+ const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); |
+ const bool active_render_block = x_energy > 10000.f * kFftLengthBy2; |
+ active_render_blocks_ += active_render_block ? 1 : 0; |
+ --echo_path_change_counter_; |
+ |
+ // Estimate delays. |
+ filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); |
external_delay_ = |
external_delay_samples |
? rtc::Optional<size_t>(*external_delay_samples / kBlockSize) |
: rtc::Optional<size_t>(); |
- const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); |
- |
- active_render_blocks_ = |
- echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1; |
- |
- echo_path_change_counter_ = echo_path_variability.AudioPathChanged() |
- ? kEchoPathChangeCounterMax |
- : echo_path_change_counter_ - 1; |
- active_render_counter_ = x_energy > 10000.f * kFftLengthBy2 |
- ? kActiveRenderCounterMax |
- : active_render_counter_ - 1; |
- |
- usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0; |
- |
- echo_leakage_detected_ = echo_leakage_detected; |
- |
- model_based_aec_feasible_ = usable_linear_estimate_ || external_delay_; |
- |
- if (usable_linear_estimate_) { |
- const auto& X2 = X_buffer.Spectrum(*filter_delay_); |
- |
- // TODO(peah): Expose these as stats. |
+ // Update the ERL and ERLE measures. |
+ if (filter_delay_ && echo_path_change_counter_ <= 0) { |
+ const auto& X2 = render_buffer.Spectrum(*filter_delay_); |
erle_estimator_.Update(X2, Y2, E2_main); |
erl_estimator_.Update(X2, Y2); |
- |
-// TODO(peah): Add working functionality for headset detection. Until the |
-// functionality for that is working the headset detector is hardcoded to detect |
-// no headset. |
-#if 0 |
- const auto& erl = erl_estimator_.Erl(); |
- const int low_erl_band_count = std::count_if( |
- erl.begin(), erl.end(), [](float a) { return a <= 0.1f; }); |
- |
- const int noisy_band_count = std::count_if( |
- filter_estimate_strength_.begin(), filter_estimate_strength_.end(), |
- [](float a) { return a <= 10.f; }); |
- headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20; |
-#endif |
- headset_detected_ = false; |
- } else { |
- headset_detected_ = false; |
} |
+ |
+ // Detect and flag echo saturation. |
+ const float max_sample = fabs(*std::max_element( |
ivoc
2017/04/05 15:21:25
A DCHECK for the size of x would be nice (max_elem
peah-webrtc
2017/04/06 07:20:32
Good point.
Done.
|
+ x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); |
+ const bool saturated_echo = |
+ previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture(); |
+ previous_max_sample_ = max_sample; |
+ |
+ // Counts the blocks since saturation. |
+ blocks_since_last_saturation_ = |
+ saturated_echo ? 0 : blocks_since_last_saturation_ + 1; |
+ echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; |
+ |
+ // Flag whether the linear filter estimate is usable. |
+ usable_linear_estimate_ = |
+ (!echo_saturation_) && |
+ active_render_blocks_ > kEchoPathChangeConvergenceBlocks && |
+ filter_delay_ && echo_path_change_counter_ <= 0; |
+ |
+ // After an amount of active render samples for which an echo should have been |
+ // detected in the capture signal if the ERL was not infinite, flag that a |
+ // headset is used. |
+ headset_detected_ = !external_delay_ && !filter_delay_ && |
+ active_render_blocks_ >= kEchoPathChangeConvergenceBlocks; |
} |
} // namespace webrtc |