| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "webrtc/modules/audio_processing/aec3/aec_state.h" | 11 #include "webrtc/modules/audio_processing/aec3/aec_state.h" |
| 12 | 12 |
| 13 #include <math.h> | 13 #include <math.h> |
| 14 #include <numeric> | 14 #include <numeric> |
| 15 #include <vector> | 15 #include <vector> |
| 16 | 16 |
| 17 #include "webrtc/base/array_view.h" |
| 17 #include "webrtc/base/atomicops.h" | 18 #include "webrtc/base/atomicops.h" |
| 18 #include "webrtc/base/checks.h" | 19 #include "webrtc/base/checks.h" |
| 19 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" | 20 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" |
| 20 | 21 |
| 21 namespace webrtc { | 22 namespace webrtc { |
| 22 namespace { | 23 namespace { |
| 23 | 24 |
| 24 constexpr float kMaxFilterEstimateStrength = 1000.f; | 25 constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond; |
| 26 constexpr size_t kSaturationLeakageBlocks = 20; |
| 25 | 27 |
| 26 // Compute the delay of the adaptive filter as the partition with a distinct | 28 // Computes delay of the adaptive filter. |
| 27 // peak. | 29 rtc::Optional<size_t> EstimateFilterDelay( |
| 28 void AnalyzeFilter( | |
| 29 const std::vector<std::array<float, kFftLengthBy2Plus1>>& | 30 const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
| 30 filter_frequency_response, | 31 adaptive_filter_frequency_response) { |
| 31 std::array<bool, kFftLengthBy2Plus1>* bands_with_reliable_filter, | 32 const auto& H2 = adaptive_filter_frequency_response; |
| 32 std::array<float, kFftLengthBy2Plus1>* filter_estimate_strength, | |
| 33 rtc::Optional<size_t>* filter_delay) { | |
| 34 const auto& H2 = filter_frequency_response; | |
| 35 | 33 |
| 36 size_t reliable_delays_sum = 0; | 34 size_t reliable_delays_sum = 0; |
| 37 size_t num_reliable_delays = 0; | 35 size_t num_reliable_delays = 0; |
| 38 | 36 |
| 39 constexpr size_t kUpperBin = kFftLengthBy2 - 5; | 37 constexpr size_t kUpperBin = kFftLengthBy2 - 5; |
| 38 constexpr float kMinPeakMargin = 10.f; |
| 39 const size_t kTailPartition = H2.size() - 1; |
| 40 for (size_t k = 1; k < kUpperBin; ++k) { | 40 for (size_t k = 1; k < kUpperBin; ++k) { |
| 41 // Find the maximum of H2[j]. |
| 41 int peak = 0; | 42 int peak = 0; |
| 42 for (size_t j = 0; j < H2.size(); ++j) { | 43 for (size_t j = 0; j < H2.size(); ++j) { |
| 43 if (H2[j][k] > H2[peak][k]) { | 44 if (H2[j][k] > H2[peak][k]) { |
| 44 peak = j; | 45 peak = j; |
| 45 } | 46 } |
| 46 } | 47 } |
| 47 | 48 |
| 48 if (H2[peak][k] == 0.f) { | 49 // Count the peak as a delay only if the peak is sufficiently larger than |
| 49 (*filter_estimate_strength)[k] = 0.f; | 50 // the tail. |
| 50 } else if (H2[H2.size() - 1][k] == 0.f) { | 51 if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) { |
| 51 (*filter_estimate_strength)[k] = kMaxFilterEstimateStrength; | |
| 52 } else { | |
| 53 (*filter_estimate_strength)[k] = std::min( | |
| 54 kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]); | |
| 55 } | |
| 56 | |
| 57 constexpr float kMargin = 10.f; | |
| 58 if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) { | |
| 59 (*bands_with_reliable_filter)[k] = true; | |
| 60 reliable_delays_sum += peak; | 52 reliable_delays_sum += peak; |
| 61 ++num_reliable_delays; | 53 ++num_reliable_delays; |
| 62 } else { | |
| 63 (*bands_with_reliable_filter)[k] = false; | |
| 64 } | 54 } |
| 65 } | 55 } |
| 66 (*bands_with_reliable_filter)[0] = (*bands_with_reliable_filter)[1]; | |
| 67 std::fill(bands_with_reliable_filter->begin() + kUpperBin, | |
| 68 bands_with_reliable_filter->end(), | |
| 69 (*bands_with_reliable_filter)[kUpperBin - 1]); | |
| 70 (*filter_estimate_strength)[0] = (*filter_estimate_strength)[1]; | |
| 71 std::fill(filter_estimate_strength->begin() + kUpperBin, | |
| 72 filter_estimate_strength->end(), | |
| 73 (*filter_estimate_strength)[kUpperBin - 1]); | |
| 74 | 56 |
| 75 *filter_delay = | 57 // Return no delay if not sufficient delays have been found. |
| 76 num_reliable_delays > 20 | 58 if (num_reliable_delays < 21) { |
| 77 ? rtc::Optional<size_t>(reliable_delays_sum / num_reliable_delays) | 59 return rtc::Optional<size_t>(); |
| 78 : rtc::Optional<size_t>(); | 60 } |
| 61 |
| 62 const size_t delay = reliable_delays_sum / num_reliable_delays; |
| 63 // Sanity check that the peak is not caused by a false strong DC-component in |
| 64 // the filter. |
| 65 for (size_t k = 1; k < kUpperBin; ++k) { |
| 66 if (H2[delay][k] > H2[delay][0]) { |
| 67 RTC_DCHECK_GT(H2.size(), delay); |
| 68 return rtc::Optional<size_t>(delay); |
| 69 } |
| 70 } |
| 71 return rtc::Optional<size_t>(); |
| 79 } | 72 } |
| 80 | 73 |
| 81 constexpr int kActiveRenderCounterInitial = 50; | 74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5; |
| 82 constexpr int kActiveRenderCounterMax = 200; | 75 constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond; |
| 83 constexpr int kEchoPathChangeCounterInitial = 50; | |
| 84 constexpr int kEchoPathChangeCounterMax = 3 * 250; | |
| 85 | 76 |
| 86 } // namespace | 77 } // namespace |
| 87 | 78 |
| 88 int AecState::instance_count_ = 0; | 79 int AecState::instance_count_ = 0; |
| 89 | 80 |
| 90 AecState::AecState() | 81 AecState::AecState() |
| 91 : data_dumper_( | 82 : data_dumper_( |
| 92 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), | 83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
| 93 echo_path_change_counter_(kEchoPathChangeCounterInitial), | 84 echo_path_change_counter_(kEchoPathChangeCounterInitial) {} |
| 94 active_render_counter_(kActiveRenderCounterInitial) { | |
| 95 bands_with_reliable_filter_.fill(false); | |
| 96 filter_estimate_strength_.fill(0.f); | |
| 97 } | |
| 98 | 85 |
| 99 AecState::~AecState() = default; | 86 AecState::~AecState() = default; |
| 100 | 87 |
| 88 void AecState::HandleEchoPathChange( |
| 89 const EchoPathVariability& echo_path_variability) { |
| 90 if (echo_path_variability.AudioPathChanged()) { |
| 91 blocks_since_last_saturation_ = 0; |
| 92 active_render_blocks_ = 0; |
| 93 echo_path_change_counter_ = kEchoPathChangeCounterMax; |
| 94 usable_linear_estimate_ = false; |
| 95 echo_leakage_detected_ = false; |
| 96 capture_signal_saturation_ = false; |
| 97 echo_saturation_ = false; |
| 98 headset_detected_ = false; |
| 99 previous_max_sample_ = 0.f; |
| 100 } |
| 101 } |
| 102 |
| 101 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& | 103 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
| 102 filter_frequency_response, | 104 adaptive_filter_frequency_response, |
| 103 const rtc::Optional<size_t>& external_delay_samples, | 105 const rtc::Optional<size_t>& external_delay_samples, |
| 104 const RenderBuffer& X_buffer, | 106 const RenderBuffer& render_buffer, |
| 105 const std::array<float, kFftLengthBy2Plus1>& E2_main, | 107 const std::array<float, kFftLengthBy2Plus1>& E2_main, |
| 106 const std::array<float, kFftLengthBy2Plus1>& E2_shadow, | |
| 107 const std::array<float, kFftLengthBy2Plus1>& Y2, | 108 const std::array<float, kFftLengthBy2Plus1>& Y2, |
| 108 rtc::ArrayView<const float> x, | 109 rtc::ArrayView<const float> x, |
| 109 const EchoPathVariability& echo_path_variability, | |
| 110 bool echo_leakage_detected) { | 110 bool echo_leakage_detected) { |
| 111 filter_length_ = filter_frequency_response.size(); | 111 // Store input parameters. |
| 112 AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_, | 112 echo_leakage_detected_ = echo_leakage_detected; |
| 113 &filter_estimate_strength_, &filter_delay_); | 113 |
| 114 // Compute the externally provided delay in partitions. The truncation is | 114 // Update counters. |
| 115 // intended here. | 115 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); |
| 116 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2; |
| 117 active_render_blocks_ += active_render_block ? 1 : 0; |
| 118 --echo_path_change_counter_; |
| 119 |
| 120 // Estimate delays. |
| 121 filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); |
| 116 external_delay_ = | 122 external_delay_ = |
| 117 external_delay_samples | 123 external_delay_samples |
| 118 ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize) | 124 ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize) |
| 119 : rtc::Optional<size_t>(); | 125 : rtc::Optional<size_t>(); |
| 120 | 126 |
| 121 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); | 127 // Update the ERL and ERLE measures. |
| 122 | 128 if (filter_delay_ && echo_path_change_counter_ <= 0) { |
| 123 active_render_blocks_ = | 129 const auto& X2 = render_buffer.Spectrum(*filter_delay_); |
| 124 echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1; | |
| 125 | |
| 126 echo_path_change_counter_ = echo_path_variability.AudioPathChanged() | |
| 127 ? kEchoPathChangeCounterMax | |
| 128 : echo_path_change_counter_ - 1; | |
| 129 active_render_counter_ = x_energy > 10000.f * kFftLengthBy2 | |
| 130 ? kActiveRenderCounterMax | |
| 131 : active_render_counter_ - 1; | |
| 132 | |
| 133 usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0; | |
| 134 | |
| 135 echo_leakage_detected_ = echo_leakage_detected; | |
| 136 | |
| 137 model_based_aec_feasible_ = usable_linear_estimate_ || external_delay_; | |
| 138 | |
| 139 if (usable_linear_estimate_) { | |
| 140 const auto& X2 = X_buffer.Spectrum(*filter_delay_); | |
| 141 | |
| 142 // TODO(peah): Expose these as stats. | |
| 143 erle_estimator_.Update(X2, Y2, E2_main); | 130 erle_estimator_.Update(X2, Y2, E2_main); |
| 144 erl_estimator_.Update(X2, Y2); | 131 erl_estimator_.Update(X2, Y2); |
| 132 } |
| 145 | 133 |
| 146 // TODO(peah): Add working functionality for headset detection. Until the | 134 // Detect and flag echo saturation. |
| 147 // functionality for that is working the headset detector is hardcoded to detect | 135 RTC_DCHECK_LT(0, x.size()); |
| 148 // no headset. | 136 const float max_sample = fabs(*std::max_element( |
| 149 #if 0 | 137 x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); |
| 150 const auto& erl = erl_estimator_.Erl(); | 138 const bool saturated_echo = |
| 151 const int low_erl_band_count = std::count_if( | 139 previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture(); |
| 152 erl.begin(), erl.end(), [](float a) { return a <= 0.1f; }); | 140 previous_max_sample_ = max_sample; |
| 153 | 141 |
| 154 const int noisy_band_count = std::count_if( | 142 // Counts the blocks since saturation. |
| 155 filter_estimate_strength_.begin(), filter_estimate_strength_.end(), | 143 blocks_since_last_saturation_ = |
| 156 [](float a) { return a <= 10.f; }); | 144 saturated_echo ? 0 : blocks_since_last_saturation_ + 1; |
| 157 headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20; | 145 echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; |
| 158 #endif | 146 |
| 159 headset_detected_ = false; | 147 // Flag whether the linear filter estimate is usable. |
| 160 } else { | 148 usable_linear_estimate_ = |
| 161 headset_detected_ = false; | 149 (!echo_saturation_) && |
| 162 } | 150 active_render_blocks_ > kEchoPathChangeConvergenceBlocks && |
| 151 filter_delay_ && echo_path_change_counter_ <= 0; |
| 152 |
| 153 // After an amount of active render samples for which an echo should have been |
| 154 // detected in the capture signal if the ERL was not infinite, flag that a |
| 155 // headset is used. |
| 156 headset_detected_ = !external_delay_ && !filter_delay_ && |
| 157 active_render_blocks_ >= kEchoPathChangeConvergenceBlocks; |
| 163 } | 158 } |
| 164 | 159 |
| 165 } // namespace webrtc | 160 } // namespace webrtc |
| OLD | NEW |