Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(320)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1718793002: Fix the gain calculation in IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@vad
Patch Set: Make windows happy Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 12 matching lines...) Expand all
23 namespace webrtc { 23 namespace webrtc {
24 24
25 namespace { 25 namespace {
26 26
27 const size_t kErbResolution = 2; 27 const size_t kErbResolution = 2;
28 const int kWindowSizeMs = 16; 28 const int kWindowSizeMs = 16;
29 const int kChunkSizeMs = 10; // Size provided by APM. 29 const int kChunkSizeMs = 10; // Size provided by APM.
30 const float kClipFreqKhz = 0.2f; 30 const float kClipFreqKhz = 0.2f;
31 const float kKbdAlpha = 1.5f; 31 const float kKbdAlpha = 1.5f;
32 const float kLambdaBot = -1.0f; // Extreme values in bisection 32 const float kLambdaBot = -1.0f; // Extreme values in bisection
33 const float kLambdaTop = -10e-18f; // search for lamda. 33 const float kLambdaTop = -1e-5f; // search for lamda.
34 const float kVoiceProbabilityThreshold = 0.02f; 34 const float kVoiceProbabilityThreshold = 0.02f;
35 // Number of chunks after voice activity which is still considered speech. 35 // Number of chunks after voice activity which is still considered speech.
36 const size_t kSpeechOffsetDelay = 80; 36 const size_t kSpeechOffsetDelay = 80;
37 const float kDecayRate = 0.98f; // Power estimation decay rate. 37 const float kDecayRate = 0.98f; // Power estimation decay rate.
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.
39 const float kRho = 0.0004f; // Default production and interpretation SNR. 39 const float kRho = 0.0004f; // Default production and interpretation SNR.
40 40
41 // Returns dot product of vectors |a| and |b| with size |length|. 41 // Returns dot product of vectors |a| and |b| with size |length|.
42 float DotProduct(const float* a, const float* b, size_t length) { 42 float DotProduct(const float* a, const float* b, size_t length) {
43 float ret = 0.f; 43 float ret = 0.f;
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 filtered_noise_pow_.get()); 157 filtered_noise_pow_.get());
158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); 158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
159 const float power_target = 159 const float power_target =
160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f); 160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);
161 const float power_top = 161 const float power_top =
162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); 162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); 163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
164 const float power_bot = 164 const float power_bot =
165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); 165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
166 if (power_target >= power_bot && power_target <= power_top) { 166 if (power_target >= power_bot && power_target <= power_top) {
167 SolveForLambda(power_target, power_bot, power_top); 167 SolveForLambda(power_target);
168 UpdateErbGains(); 168 UpdateErbGains();
169 } // Else experiencing power underflow, so do nothing. 169 } // Else experiencing power underflow, so do nothing.
170 gain_applier_.Apply(in_block, out_block); 170 gain_applier_.Apply(in_block, out_block);
171 } 171 }
172 172
173 void IntelligibilityEnhancer::SolveForLambda(float power_target, 173 void IntelligibilityEnhancer::SolveForLambda(float power_target) {
174 float power_bot,
175 float power_top) {
176 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values 174 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
177 const int kMaxIters = 100; // for these, based on experiments. 175 const int kMaxIters = 100; // for these, based on experiments.
178 176
179 const float reciprocal_power_target = 177 const float reciprocal_power_target =
180 1.f / (power_target + std::numeric_limits<float>::epsilon()); 178 1.f / (power_target + std::numeric_limits<float>::epsilon());
181 float lambda_bot = kLambdaBot; 179 float lambda_bot = kLambdaBot;
182 float lambda_top = kLambdaTop; 180 float lambda_top = kLambdaTop;
183 float power_ratio = 2.f; // Ratio of achieved power to target power. 181 float power_ratio = 2.f; // Ratio of achieved power to target power.
184 int iters = 0; 182 int iters = 0;
185 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { 183 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
186 const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.f; 184 const float lambda = (lambda_bot + lambda_top) / 2.f;
187 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); 185 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
188 const float power = 186 const float power =
189 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); 187 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
190 if (power < power_target) { 188 if (power < power_target) {
191 lambda_bot = lambda; 189 lambda_bot = lambda;
192 } else { 190 } else {
193 lambda_top = lambda; 191 lambda_top = lambda;
194 } 192 }
195 power_ratio = std::fabs(power * reciprocal_power_target); 193 power_ratio = std::fabs(power * reciprocal_power_target);
196 ++iters; 194 ++iters;
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 for (size_t j = 0; j < bank_size_; ++j) { 277 for (size_t j = 0; j < bank_size_; ++j) {
280 filter_bank[j][i] /= sum; 278 filter_bank[j][i] /= sum;
281 } 279 }
282 } 280 }
283 return filter_bank; 281 return filter_bank;
284 } 282 }
285 283
286 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, 284 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
287 size_t start_freq, 285 size_t start_freq,
288 float* sols) { 286 float* sols) {
289 bool quadratic = (kRho < 1.f); 287 const float kMinPower = 1e-5f;
288
290 const float* pow_x0 = filtered_clear_pow_.get(); 289 const float* pow_x0 = filtered_clear_pow_.get();
291 const float* pow_n0 = filtered_noise_pow_.get(); 290 const float* pow_n0 = filtered_noise_pow_.get();
292 291
293 for (size_t n = 0; n < start_freq; ++n) { 292 for (size_t n = 0; n < start_freq; ++n) {
294 sols[n] = 1.f; 293 sols[n] = 1.f;
295 } 294 }
296 295
297 // Analytic solution for optimal gains. See paper for derivation. 296 // Analytic solution for optimal gains. See paper for derivation.
298 for (size_t n = start_freq - 1; n < bank_size_; ++n) { 297 for (size_t n = start_freq; n < bank_size_; ++n) {
299 float alpha0, beta0, gamma0; 298 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
300 gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + 299 sols[n] = 1.f;
301 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
302 beta0 = lambda * pow_x0[n] * (2 - kRho) * pow_x0[n] * pow_n0[n];
303 if (quadratic) {
304 alpha0 = lambda * pow_x0[n] * (1 - kRho) * pow_x0[n] * pow_x0[n];
305 sols[n] =
306 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) /
307 (2 * alpha0 + std::numeric_limits<float>::epsilon());
308 } else { 300 } else {
309 sols[n] = -gamma0 / beta0; 301 const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
302 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
303 const float beta0 =
304 lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
305 const float alpha0 =
306 lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
307 RTC_DCHECK_LT(alpha0, 0.f);
308 // The quadratic equation should always have real roots, but to guard
309 // against numerical errors we limit it to a minimum of zero.
310 sols[n] = std::max(
311 0.f, (-beta0 - std::sqrt(std::max(
312 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
313 (2.f * alpha0));
310 } 314 }
311 sols[n] = fmax(0, sols[n]);
312 } 315 }
313 } 316 }
314 317
315 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { 318 bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
316 FloatToS16(audio, chunk_length_, &audio_s16_[0]); 319 FloatToS16(audio, chunk_length_, &audio_s16_[0]);
317 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_); 320 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);
318 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 321 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
319 chunks_since_voice_ = 0; 322 chunks_since_voice_ = 0;
320 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 323 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
321 ++chunks_since_voice_; 324 ++chunks_since_voice_;
322 } 325 }
323 return chunks_since_voice_ < kSpeechOffsetDelay; 326 return chunks_since_voice_ < kSpeechOffsetDelay;
324 } 327 }
325 328
326 } // namespace webrtc 329 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698