Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1718793002: Fix the gain calculation in IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@vad
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 12 matching lines...) Expand all
23 namespace webrtc { 23 namespace webrtc {
24 24
25 namespace { 25 namespace {
26 26
27 const size_t kErbResolution = 2; 27 const size_t kErbResolution = 2;
28 const int kWindowSizeMs = 16; 28 const int kWindowSizeMs = 16;
29 const int kChunkSizeMs = 10; // Size provided by APM. 29 const int kChunkSizeMs = 10; // Size provided by APM.
30 const float kClipFreqKhz = 0.2f; 30 const float kClipFreqKhz = 0.2f;
31 const float kKbdAlpha = 1.5f; 31 const float kKbdAlpha = 1.5f;
32 const float kLambdaBot = -1.0f; // Extreme values in bisection 32 const float kLambdaBot = -1.0f; // Extreme values in bisection
33 const float kLambdaTop = -10e-18f; // search for lamda. 33 const float kLambdaTop = -1e-5; // search for lamda.
34 const float kVoiceProbabilityThreshold = 0.02; 34 const float kVoiceProbabilityThreshold = 0.02;
35 // Number of chunks after voice activity which is still considered speech. 35 // Number of chunks after voice activity which is still considered speech.
36 const size_t kSpeechOffsetDelay = 80; 36 const size_t kSpeechOffsetDelay = 80;
37 const float kDecayRate = 0.98f; // Power estimation decay rate. 37 const float kDecayRate = 0.98f; // Power estimation decay rate.
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.
39 const float kRho = 0.0004f; // Default production and interpretation SNR. 39 const float kRho = 0.0004f; // Default production and interpretation SNR.
40 40
41 // Returns dot product of vectors |a| and |b| with size |length|. 41 // Returns dot product of vectors |a| and |b| with size |length|.
42 float DotProduct(const float* a, const float* b, size_t length) { 42 float DotProduct(const float* a, const float* b, size_t length) {
43 float ret = 0.f; 43 float ret = 0.f;
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 filtered_noise_pow_.get()); 157 filtered_noise_pow_.get());
158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); 158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
159 const float power_target = 159 const float power_target =
160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f); 160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);
161 const float power_top = 161 const float power_top =
162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); 162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); 163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
164 const float power_bot = 164 const float power_bot =
165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); 165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
166 if (power_target >= power_bot && power_target <= power_top) { 166 if (power_target >= power_bot && power_target <= power_top) {
167 SolveForLambda(power_target, power_bot, power_top); 167 SolveForLambda(power_target);
168 UpdateErbGains(); 168 UpdateErbGains();
169 } // Else experiencing power underflow, so do nothing. 169 } // Else experiencing power underflow, so do nothing.
170 gain_applier_.Apply(in_block, out_block); 170 gain_applier_.Apply(in_block, out_block);
171 } 171 }
172 172
173 void IntelligibilityEnhancer::SolveForLambda(float power_target, 173 void IntelligibilityEnhancer::SolveForLambda(float power_target) {
174 float power_bot,
175 float power_top) {
176 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values 174 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
177 const int kMaxIters = 100; // for these, based on experiments. 175 const int kMaxIters = 100; // for these, based on experiments.
178 176
179 const float reciprocal_power_target = 177 const float reciprocal_power_target =
180 1.f / (power_target + std::numeric_limits<float>::epsilon()); 178 1.f / (power_target + std::numeric_limits<float>::epsilon());
181 float lambda_bot = kLambdaBot; 179 float lambda_bot = kLambdaBot;
182 float lambda_top = kLambdaTop; 180 float lambda_top = kLambdaTop;
183 float power_ratio = 2.f; // Ratio of achieved power to target power. 181 float power_ratio = 2.f; // Ratio of achieved power to target power.
184 int iters = 0; 182 int iters = 0;
185 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { 183 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
186 const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.f; 184 const float lambda = (lambda_bot + lambda_top) / 2.f;
187 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); 185 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
188 const float power = 186 const float power =
189 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); 187 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
190 if (power < power_target) { 188 if (power < power_target) {
191 lambda_bot = lambda; 189 lambda_bot = lambda;
192 } else { 190 } else {
193 lambda_top = lambda; 191 lambda_top = lambda;
194 } 192 }
195 power_ratio = std::fabs(power * reciprocal_power_target); 193 power_ratio = std::fabs(power * reciprocal_power_target);
196 ++iters; 194 ++iters;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
281 for (size_t j = 0; j < bank_size_; ++j) { 279 for (size_t j = 0; j < bank_size_; ++j) {
282 filter_bank[j][i] /= sum; 280 filter_bank[j][i] /= sum;
283 } 281 }
284 } 282 }
285 return filter_bank; 283 return filter_bank;
286 } 284 }
287 285
288 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, 286 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
289 size_t start_freq, 287 size_t start_freq,
290 float* sols) { 288 float* sols) {
291 bool quadratic = (kRho < 1.f); 289 const float kMinPower = 1e-5;
290
292 const float* pow_x0 = filtered_clear_pow_.get(); 291 const float* pow_x0 = filtered_clear_pow_.get();
293 const float* pow_n0 = filtered_noise_pow_.get(); 292 const float* pow_n0 = filtered_noise_pow_.get();
294 293
295 for (size_t n = 0; n < start_freq; ++n) { 294 for (size_t n = 0; n < start_freq; ++n) {
296 sols[n] = 1.f; 295 sols[n] = 1.f;
297 } 296 }
298 297
299 // Analytic solution for optimal gains. See paper for derivation. 298 // Analytic solution for optimal gains. See paper for derivation.
300 for (size_t n = start_freq - 1; n < bank_size_; ++n) { 299 for (size_t n = start_freq; n < bank_size_; ++n) {
301 float alpha0, beta0, gamma0; 300 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
302 gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + 301 sols[n] = 1.f;
303 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
304 beta0 = lambda * pow_x0[n] * (2 - kRho) * pow_x0[n] * pow_n0[n];
305 if (quadratic) {
306 alpha0 = lambda * pow_x0[n] * (1 - kRho) * pow_x0[n] * pow_x0[n];
307 sols[n] =
308 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) /
309 (2 * alpha0 + std::numeric_limits<float>::epsilon());
310 } else { 302 } else {
311 sols[n] = -gamma0 / beta0; 303 float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
hlundin-webrtc 2016/02/22 12:59:22 I like local consts...
aluebs-webrtc 2016/02/22 23:56:10 I like them as well, but apparently I have a hard
304 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
305 float beta0 = lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
306 float alpha0 = lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
307 if (beta0 * beta0 < 4.f * alpha0 * gamma0) {
hlundin-webrtc 2016/02/22 12:59:22 You are essentially calculating beta0 * beta0 - 4.
aluebs-webrtc 2016/02/22 23:56:11 Good point, done. Although I am not creative enoug
hlundin-webrtc 2016/02/24 09:51:00 I don't know the algorithm good enough to suggest
turaj 2016/02/24 15:26:42 If you consider my suggestion of using max(0, b^2
aluebs-webrtc 2016/02/24 23:40:48 No name is best name :)
308 sols[n] = -beta0 / (2.f * alpha0);
hlundin-webrtc 2016/02/22 12:59:22 This is not the same as the old code, right?
turaj 2016/02/22 16:05:20 My interpretation of the paper Eq 18 is that the q
aluebs-webrtc 2016/02/22 23:56:10 I agree that the quadratic equation always has rea
turaj 2016/02/24 15:26:42 Thanks for the explanation, it makes total sense,
aluebs-webrtc 2016/02/24 23:40:47 That is a great point, done.
309 } else {
310 sols[n] = (-beta0 - sqrtf(beta0 * beta0 - 4.f * alpha0 * gamma0)) /
hlundin-webrtc 2016/02/22 12:59:22 No need for regularization any longer?
aluebs-webrtc 2016/02/22 23:56:10 No, because now I check for a minimum power in lin
hlundin-webrtc 2016/02/24 09:51:00 You may want to add a DCHECK to document/verify yo
aluebs-webrtc 2016/02/24 23:40:47 Done.
311 (2.f * alpha0);
312 }
313 sols[n] = fmax(0.f, sols[n]);
312 } 314 }
313 sols[n] = fmax(0, sols[n]);
314 } 315 }
315 } 316 }
316 317
317 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { 318 bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
318 FloatToS16(audio, chunk_length_, &audio_s16_[0]); 319 FloatToS16(audio, chunk_length_, &audio_s16_[0]);
319 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_); 320 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);
320 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 321 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
321 chunks_since_voice_ = 0; 322 chunks_since_voice_ = 0;
322 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 323 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
323 ++chunks_since_voice_; 324 ++chunks_since_voice_;
324 } 325 }
325 return chunks_since_voice_ < kSpeechOffsetDelay; 326 return chunks_since_voice_ < kSpeechOffsetDelay;
326 } 327 }
327 328
328 } // namespace webrtc 329 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698