Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(904)

Side by Side Diff: webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc

Issue 2823903003: Echo canceller 3 improvements for setups with headsets. (Closed)
Patch Set: Changes in response to reviewer comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec3/residual_echo_estimator.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 22 matching lines...) Expand all
33 33
34 // Apply soft noise gate of -78 dBFS. 34 // Apply soft noise gate of -78 dBFS.
35 constexpr float kNoiseGatePower = 27509.42f; 35 constexpr float kNoiseGatePower = 27509.42f;
36 std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) { 36 std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) {
37 if (kNoiseGatePower > a) { 37 if (kNoiseGatePower > a) {
38 a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a)); 38 a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
39 } 39 }
40 }); 40 });
41 } 41 }
42 42
43 constexpr int kNoiseFloorCounterMax = 50;
44 constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
45
46 // Updates estimate for the power of the stationary noise component in the
47 // render signal.
48 void RenderNoisePower(
49 const RenderBuffer& render_buffer,
50 std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
51 std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
52 RTC_DCHECK(X2_noise_floor);
53 RTC_DCHECK(X2_noise_floor_counter);
54
55 const auto render_power = render_buffer.Spectrum(0);
56 RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
57 RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
58
59 // Estimate the stationary noise power in a minimum statistics manner.
60 for (size_t k = 0; k < render_power.size(); ++k) {
61 // Decrease rapidly.
62 if (render_power[k] < (*X2_noise_floor)[k]) {
63 (*X2_noise_floor)[k] = render_power[k];
64 (*X2_noise_floor_counter)[k] = 0;
65 } else {
66 // Increase in a delayed, leaky manner.
67 if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
68 (*X2_noise_floor)[k] =
69 std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
70 } else {
71 ++(*X2_noise_floor_counter)[k];
72 }
73 }
74 }
75 }
76
77 // Assume a minimum echo path gain of -33 dB for headsets.
78 constexpr float kHeadsetEchoPathGain = 0.0005f;
79
43 } // namespace 80 } // namespace
44 81
45 ResidualEchoEstimator::ResidualEchoEstimator() { 82 ResidualEchoEstimator::ResidualEchoEstimator() {
46 Reset(); 83 Reset();
47 } 84 }
48 85
49 ResidualEchoEstimator::~ResidualEchoEstimator() = default; 86 ResidualEchoEstimator::~ResidualEchoEstimator() = default;
50 87
51 void ResidualEchoEstimator::Estimate( 88 void ResidualEchoEstimator::Estimate(
52 bool using_subtractor_output, 89 bool using_subtractor_output,
53 const AecState& aec_state, 90 const AecState& aec_state,
54 const RenderBuffer& render_buffer, 91 const RenderBuffer& render_buffer,
55 const std::array<float, kFftLengthBy2Plus1>& S2_linear, 92 const std::array<float, kFftLengthBy2Plus1>& S2_linear,
56 const std::array<float, kFftLengthBy2Plus1>& Y2, 93 const std::array<float, kFftLengthBy2Plus1>& Y2,
57 std::array<float, kFftLengthBy2Plus1>* R2) { 94 std::array<float, kFftLengthBy2Plus1>* R2) {
58 RTC_DCHECK(R2); 95 RTC_DCHECK(R2);
59 96
60 // Return zero residual echo power when a headset is detected.
61 if (aec_state.HeadsetDetected()) {
62 if (!headset_detected_cached_) {
63 Reset();
64 headset_detected_cached_ = true;
65 }
66 R2->fill(0.f);
67 return;
68 } else {
69 headset_detected_cached_ = false;
70 }
71
72 const rtc::Optional<size_t> delay = 97 const rtc::Optional<size_t> delay =
73 aec_state.FilterDelay() 98 aec_state.FilterDelay()
74 ? aec_state.FilterDelay() 99 ? aec_state.FilterDelay()
75 : (aec_state.ExternalDelay() ? aec_state.ExternalDelay() 100 : (aec_state.ExternalDelay() ? aec_state.ExternalDelay()
76 : rtc::Optional<size_t>()); 101 : rtc::Optional<size_t>());
77 102
103 // Estimate the power of the stationary noise in the render signal.
104 RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
105
78 // Estimate the residual echo power. 106 // Estimate the residual echo power.
79 const bool use_linear_echo_power = 107 const bool use_linear_echo_power =
80 aec_state.UsableLinearEstimate() && using_subtractor_output; 108 aec_state.UsableLinearEstimate() && using_subtractor_output;
81 if (use_linear_echo_power) { 109 if (use_linear_echo_power && !aec_state.HeadsetDetected()) {
82 RTC_DCHECK(aec_state.FilterDelay()); 110 RTC_DCHECK(aec_state.FilterDelay());
83 const int filter_delay = *aec_state.FilterDelay(); 111 const int filter_delay = *aec_state.FilterDelay();
84 LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); 112 LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
85 AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay, 113 AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
86 aec_state.ReverbDecayFactor(), R2); 114 aec_state.ReverbDecayFactor(), R2);
87 } else { 115 } else {
88 // Estimate the echo generating signal power. 116 // Estimate the echo generating signal power.
89 std::array<float, kFftLengthBy2Plus1> X2; 117 std::array<float, kFftLengthBy2Plus1> X2;
90 if (aec_state.ExternalDelay() || aec_state.FilterDelay()) { 118 if (aec_state.ExternalDelay() || aec_state.FilterDelay()) {
91 RTC_DCHECK(delay); 119 RTC_DCHECK(delay);
92 const int delay_use = static_cast<int>(*delay); 120 const int delay_use = static_cast<int>(*delay);
93 121
94 // Computes the spectral power over the blocks surrounding the delay. 122 // Computes the spectral power over the blocks surrounding the delay.
95 RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize); 123 RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize);
96 EchoGeneratingPower( 124 EchoGeneratingPower(
97 render_buffer, std::max(0, delay_use - 1), 125 render_buffer, std::max(0, delay_use - 1),
98 std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2); 126 std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2);
99 } else { 127 } else {
100 // Computes the spectral power over the latest blocks. 128 // Computes the spectral power over the latest blocks.
101 EchoGeneratingPower(render_buffer, 0, 129 EchoGeneratingPower(render_buffer, 0,
102 kResidualEchoPowerRenderWindowSize - 1, &X2); 130 kResidualEchoPowerRenderWindowSize - 1, &X2);
103 } 131 }
104 132
105 NonLinearEstimate(X2, Y2, R2); 133 // Subtract the stationary noise power to avoid stationary noise causing
134 // excessive echo suppression.
135 std::transform(
136 X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
137 [](float a, float b) { return std::max(0.f, a - 10.f * b); });
138
139 NonLinearEstimate(
140 aec_state.HeadsetDetected() ? kHeadsetEchoPathGain : kFixedEchoPathGain,
141 X2, Y2, R2);
106 AddEchoReverb(*R2, aec_state.SaturatedEcho(), 142 AddEchoReverb(*R2, aec_state.SaturatedEcho(),
107 std::min(static_cast<size_t>(kAdaptiveFilterLength), 143 std::min(static_cast<size_t>(kAdaptiveFilterLength),
108 delay.value_or(kAdaptiveFilterLength)), 144 delay.value_or(kAdaptiveFilterLength)),
109 aec_state.ReverbDecayFactor(), R2); 145 aec_state.ReverbDecayFactor(), R2);
110 } 146 }
111 147
112 // If the echo is saturated, estimate the echo power as the maximum echo power 148 // If the echo is saturated, estimate the echo power as the maximum echo power
113 // with a leakage factor. 149 // with a leakage factor.
114 if (aec_state.SaturatedEcho()) { 150 if (aec_state.SaturatedEcho()) {
115 R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); 151 R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
116 } 152 }
117 153
118 std::copy(R2->begin(), R2->end(), R2_old_.begin()); 154 std::copy(R2->begin(), R2->end(), R2_old_.begin());
119 } 155 }
120 156
121 void ResidualEchoEstimator::Reset() { 157 void ResidualEchoEstimator::Reset() {
158 X2_noise_floor_counter_.fill(kNoiseFloorCounterMax);
159 X2_noise_floor_.fill(kNoiseFloorMin);
122 R2_reverb_.fill(0.f); 160 R2_reverb_.fill(0.f);
123 R2_old_.fill(0.f); 161 R2_old_.fill(0.f);
124 R2_hold_counter_.fill(0.f); 162 R2_hold_counter_.fill(0.f);
125 for (auto& S2_k : S2_old_) { 163 for (auto& S2_k : S2_old_) {
126 S2_k.fill(0.f); 164 S2_k.fill(0.f);
127 } 165 }
128 } 166 }
129 167
130 void ResidualEchoEstimator::LinearEstimate( 168 void ResidualEchoEstimator::LinearEstimate(
131 const std::array<float, kFftLengthBy2Plus1>& S2_linear, 169 const std::array<float, kFftLengthBy2Plus1>& S2_linear,
132 const std::array<float, kFftLengthBy2Plus1>& erle, 170 const std::array<float, kFftLengthBy2Plus1>& erle,
133 size_t delay, 171 size_t delay,
134 std::array<float, kFftLengthBy2Plus1>* R2) { 172 std::array<float, kFftLengthBy2Plus1>* R2) {
135 std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f); 173 std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f);
136 std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), 174 std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
137 [](float a, float b) { 175 [](float a, float b) {
138 RTC_DCHECK_LT(0.f, a); 176 RTC_DCHECK_LT(0.f, a);
139 return b / a; 177 return b / a;
140 }); 178 });
141 } 179 }
142 180
143 void ResidualEchoEstimator::NonLinearEstimate( 181 void ResidualEchoEstimator::NonLinearEstimate(
182 float echo_path_gain,
144 const std::array<float, kFftLengthBy2Plus1>& X2, 183 const std::array<float, kFftLengthBy2Plus1>& X2,
145 const std::array<float, kFftLengthBy2Plus1>& Y2, 184 const std::array<float, kFftLengthBy2Plus1>& Y2,
146 std::array<float, kFftLengthBy2Plus1>* R2) { 185 std::array<float, kFftLengthBy2Plus1>* R2) {
147 // Compute preliminary residual echo. 186 // Compute preliminary residual echo.
148 // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to
149 // 20 dB.
150 std::transform(X2.begin(), X2.end(), R2->begin(), 187 std::transform(X2.begin(), X2.end(), R2->begin(),
151 [](float a) { return a * kFixedEchoPathGain; }); 188 [echo_path_gain](float a) { return a * echo_path_gain; });
152 189
153 for (size_t k = 0; k < R2->size(); ++k) { 190 for (size_t k = 0; k < R2->size(); ++k) {
154 // Update hold counter. 191 // Update hold counter.
155 R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1; 192 R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1;
156 193
157 // Compute the residual echo by holding a maximum echo powers and an echo 194 // Compute the residual echo by holding a maximum echo powers and an echo
158 // fading corresponding to a room with an RT60 value of about 50 ms. 195 // fading corresponding to a room with an RT60 value of about 50 ms.
159 (*R2)[k] = R2_hold_counter_[k] < 2 196 (*R2)[k] = R2_hold_counter_[k] < 2
160 ? std::max((*R2)[k], R2_old_[k]) 197 ? std::max((*R2)[k], R2_old_[k])
161 : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]); 198 : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]);
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 } else { 234 } else {
198 std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin()); 235 std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin());
199 } 236 }
200 237
201 // Add the power of the echo reverb to the residual echo power. 238 // Add the power of the echo reverb to the residual echo power.
202 std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(), 239 std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(),
203 std::plus<float>()); 240 std::plus<float>());
204 } 241 }
205 242
206 } // namespace webrtc 243 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec3/residual_echo_estimator.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698