OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
71 return rtc::Optional<size_t>(); | 71 return rtc::Optional<size_t>(); |
72 } | 72 } |
73 | 73 |
74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5; | 74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5; |
75 constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond; | 75 constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond; |
76 | 76 |
77 } // namespace | 77 } // namespace |
78 | 78 |
79 int AecState::instance_count_ = 0; | 79 int AecState::instance_count_ = 0; |
80 | 80 |
81 AecState::AecState(float echo_decay) | 81 AecState::AecState(float reverb_decay) |
82 : data_dumper_( | 82 : data_dumper_( |
83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), | 83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
84 echo_path_change_counter_(kEchoPathChangeCounterInitial), | 84 echo_path_change_counter_(kEchoPathChangeCounterInitial), |
85 echo_decay_factor_(echo_decay) {} | 85 reverb_decay_(reverb_decay) {} |
86 | 86 |
87 AecState::~AecState() = default; | 87 AecState::~AecState() = default; |
88 | 88 |
89 void AecState::HandleEchoPathChange( | 89 void AecState::HandleEchoPathChange( |
90 const EchoPathVariability& echo_path_variability) { | 90 const EchoPathVariability& echo_path_variability) { |
91 if (echo_path_variability.AudioPathChanged()) { | 91 if (echo_path_variability.AudioPathChanged()) { |
92 blocks_since_last_saturation_ = 0; | 92 blocks_since_last_saturation_ = 0; |
93 usable_linear_estimate_ = false; | 93 usable_linear_estimate_ = false; |
94 echo_leakage_detected_ = false; | 94 echo_leakage_detected_ = false; |
95 capture_signal_saturation_ = false; | 95 capture_signal_saturation_ = false; |
96 echo_saturation_ = false; | 96 echo_saturation_ = false; |
97 previous_max_sample_ = 0.f; | 97 previous_max_sample_ = 0.f; |
98 | 98 |
99 if (echo_path_variability.delay_change) { | 99 if (echo_path_variability.delay_change) { |
100 force_zero_gain_counter_ = 0; | 100 force_zero_gain_counter_ = 0; |
101 blocks_with_filter_adaptation_ = 0; | 101 blocks_with_filter_adaptation_ = 0; |
102 render_received_ = false; | 102 render_received_ = false; |
103 force_zero_gain_ = true; | 103 force_zero_gain_ = true; |
104 echo_path_change_counter_ = kEchoPathChangeCounterMax; | 104 echo_path_change_counter_ = kEchoPathChangeCounterMax; |
105 } | 105 } |
106 if (echo_path_variability.gain_change) { | 106 if (echo_path_variability.gain_change) { |
107 echo_path_change_counter_ = kEchoPathChangeCounterInitial; | 107 echo_path_change_counter_ = kEchoPathChangeCounterInitial; |
108 } | 108 } |
109 } | 109 } |
110 } | 110 } |
111 | 111 |
112 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& | 112 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
113 adaptive_filter_frequency_response, | 113 adaptive_filter_frequency_response, |
| 114 const std::array<float, kAdaptiveFilterTimeDomainLength>& |
| 115 adaptive_filter_impulse_response, |
114 const rtc::Optional<size_t>& external_delay_samples, | 116 const rtc::Optional<size_t>& external_delay_samples, |
115 const RenderBuffer& render_buffer, | 117 const RenderBuffer& render_buffer, |
116 const std::array<float, kFftLengthBy2Plus1>& E2_main, | 118 const std::array<float, kFftLengthBy2Plus1>& E2_main, |
117 const std::array<float, kFftLengthBy2Plus1>& Y2, | 119 const std::array<float, kFftLengthBy2Plus1>& Y2, |
118 rtc::ArrayView<const float> x, | 120 rtc::ArrayView<const float> x, |
| 121 const std::array<float, kBlockSize>& s, |
119 bool echo_leakage_detected) { | 122 bool echo_leakage_detected) { |
| 123 // Update the echo audibility evaluator. |
| 124 echo_audibility_.Update(x, s); |
| 125 |
120 // Store input parameters. | 126 // Store input parameters. |
121 echo_leakage_detected_ = echo_leakage_detected; | 127 echo_leakage_detected_ = echo_leakage_detected; |
122 | 128 |
123 // Update counters. | 129 // Update counters. |
124 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); | 130 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); |
125 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2; | 131 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2; |
126 if (active_render_block) { | 132 if (active_render_block) { |
127 render_received_ = true; | 133 render_received_ = true; |
128 } | 134 } |
129 blocks_with_filter_adaptation_ += | 135 blocks_with_filter_adaptation_ += |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
172 blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) && | 178 blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) && |
173 filter_delay_ && echo_path_change_counter_ <= 0; | 179 filter_delay_ && echo_path_change_counter_ <= 0; |
174 | 180 |
175 // After an amount of active render samples for which an echo should have been | 181 // After an amount of active render samples for which an echo should have been |
176 // detected in the capture signal if the ERL was not infinite, flag that a | 182 // detected in the capture signal if the ERL was not infinite, flag that a |
177 // headset is used. | 183 // headset is used. |
178 headset_detected_ = | 184 headset_detected_ = |
179 !external_delay_ && !filter_delay_ && | 185 !external_delay_ && !filter_delay_ && |
180 (!render_received_ || | 186 (!render_received_ || |
181 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks); | 187 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks); |
| 188 |
| 189 // Update the room reverb estimate. |
| 190 UpdateReverb(adaptive_filter_impulse_response); |
| 191 } |
| 192 |
| 193 void AecState::UpdateReverb( |
| 194 const std::array<float, kAdaptiveFilterTimeDomainLength>& |
| 195 impulse_response) { |
| 196 if ((!(filter_delay_ && usable_linear_estimate_)) || |
| 197 (*filter_delay_ > kAdaptiveFilterLength - 4)) { |
| 198 return; |
| 199 } |
| 200 |
| 201 // Form the data to match against by squaring the impulse response |
| 202 // coefficients. |
| 203 std::array<float, kAdaptiveFilterTimeDomainLength> matching_data; |
| 204 std::transform(impulse_response.begin(), impulse_response.end(), |
| 205 matching_data.begin(), [](float a) { return a * a; }); |
| 206 |
| 207 // Avoid matching against noise in the model by subtracting an estimate of the |
| 208 // model noise power. |
| 209 constexpr size_t kTailLength = 64; |
| 210 constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength; |
| 211 const float tail_power = *std::max_element(matching_data.begin() + tail_index, |
| 212 matching_data.end()); |
| 213 std::for_each(matching_data.begin(), matching_data.begin() + tail_index, |
| 214 [tail_power](float& a) { a = std::max(0.f, a - tail_power); }); |
| 215 |
| 216 // Identify the peak index of the impulse response. |
| 217 const size_t peak_index = *std::max_element( |
| 218 matching_data.begin(), matching_data.begin() + tail_index); |
| 219 |
| 220 if (peak_index + 128 < tail_index) { |
| 221 size_t start_index = peak_index + 64; |
| 222 // Compute the matching residual error for the current candidate to match. |
| 223 float residual_sqr_sum = 0.f; |
| 224 float d_k = reverb_decay_to_test_; |
| 225 for (size_t k = start_index; k < tail_index; ++k) { |
| 226 if (matching_data[start_index + 1] == 0.f) { |
| 227 break; |
| 228 } |
| 229 |
| 230 float residual = matching_data[k] - matching_data[peak_index] * d_k; |
| 231 residual_sqr_sum += residual * residual; |
| 232 d_k *= reverb_decay_to_test_; |
| 233 } |
| 234 |
| 235 // If needed, update the best candidate for the reverb decay. |
| 236 if (reverb_decay_candidate_residual_ < 0.f || |
| 237 residual_sqr_sum < reverb_decay_candidate_residual_) { |
| 238 reverb_decay_candidate_residual_ = residual_sqr_sum; |
| 239 reverb_decay_candidate_ = reverb_decay_to_test_; |
| 240 } |
| 241 } |
| 242 |
| 243 // Compute the next reverb candidate to evaluate such that all candidates will |
| 244 // be evaluated within one second. |
| 245 reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond); |
| 246 |
| 247 // If all reverb candidates have been evaluated, choose the best one as the |
| 248 // reverb decay. |
| 249 if (reverb_decay_to_test_ >= 0.9965f) { |
| 250 if (reverb_decay_candidate_residual_ < 0.f) { |
| 251 // Transform the decay to be in the unit of blocks. |
| 252 reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2); |
| 253 |
| 254 // Limit the estimated reverb_decay_ to the maximum one needed in practice |
| 255 // to minimize the impact of incorrect estimates. |
| 256 reverb_decay_ = std::min(0.8f, reverb_decay_); |
| 257 } |
| 258 reverb_decay_to_test_ = 0.9f; |
| 259 reverb_decay_candidate_residual_ = -1.f; |
| 260 } |
| 261 |
| 262 // For noisy impulse responses, assume a fixed tail length. |
| 263 if (tail_power > 0.0005f) { |
| 264 reverb_decay_ = 0.7f; |
| 265 } |
| 266 data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_); |
| 267 data_dumper_->DumpRaw("aec3_tail_power", tail_power); |
| 268 } |
| 269 |
| 270 void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x, |
| 271 const std::array<float, kBlockSize>& s) { |
| 272 auto result_x = std::minmax_element(x.begin(), x.end()); |
| 273 auto result_s = std::minmax_element(s.begin(), s.end()); |
| 274 const float x_abs = |
| 275 std::max(std::abs(*result_x.first), std::abs(*result_x.second)); |
| 276 const float s_abs = |
| 277 std::max(std::abs(*result_s.first), std::abs(*result_s.second)); |
| 278 |
| 279 if (x_abs < 5.f) { |
| 280 ++low_farend_counter_; |
| 281 } else { |
| 282 low_farend_counter_ = 0; |
| 283 } |
| 284 |
| 285 // The echo is deemed as not audible if the echo estimate is on the level of |
| 286 // the quantization noise in the FFTs and the nearend level is sufficiently |
| 287 // strong to mask that by ensuring that the playout and AGC gains do not boost |
| 288 // any residual echo that is below the quantization noise level. Furthermore, |
| 289 // cases where the render signal is very close to zero are also identified as |
| 290 // not producing audible echo. |
| 291 inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f; |
| 292 inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20; |
| 293 } |
| 294 |
| 295 void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) { |
| 296 const float e_max = *std::max_element(e.begin(), e.end()); |
| 297 const float e_min = *std::min_element(e.begin(), e.end()); |
| 298 const float e_abs = std::max(std::abs(e_max), std::abs(e_min)); |
| 299 |
| 300 if (max_nearend_ < e_abs) { |
| 301 max_nearend_ = e_abs; |
| 302 max_nearend_counter_ = 0; |
| 303 } else { |
| 304 if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) { |
| 305 max_nearend_ *= 0.995f; |
| 306 } |
| 307 } |
182 } | 308 } |
183 | 309 |
184 } // namespace webrtc | 310 } // namespace webrtc |
OLD | NEW |