Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(939)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1821443003: Fix normalization of noise estimate in NoiseSuppressor (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Normalize dynamically Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 22
23 namespace webrtc { 23 namespace webrtc {
24 24
25 namespace { 25 namespace {
26 26
27 const size_t kErbResolution = 2; 27 const size_t kErbResolution = 2;
28 const int kWindowSizeMs = 16; 28 const int kWindowSizeMs = 16;
29 const int kChunkSizeMs = 10; // Size provided by APM. 29 const int kChunkSizeMs = 10; // Size provided by APM.
30 const float kClipFreqKhz = 0.2f; 30 const float kClipFreqKhz = 0.2f;
31 const float kKbdAlpha = 1.5f; 31 const float kKbdAlpha = 1.5f;
32 const float kLambdaBot = -1.0f; // Extreme values in bisection 32 const double kLambdaBot = -1.0 / (1 << 30); // Extreme values in bisection
turaj 2016/03/30 14:51:55 This is relatively large change which scales the
aluebs-webrtc 2016/03/31 00:26:32 Yes, it needed to be normalized by the new noise s
33 const float kLambdaTop = -1e-5f; // search for lamda. 33 const double kLambdaTop = -1e-5 / (1 << 30); // search for lamda.
34 const float kVoiceProbabilityThreshold = 0.02f; 34 const float kVoiceProbabilityThreshold = 0.02f;
35 // Number of chunks after voice activity which is still considered speech. 35 // Number of chunks after voice activity which is still considered speech.
36 const size_t kSpeechOffsetDelay = 80; 36 const size_t kSpeechOffsetDelay = 80;
37 const float kDecayRate = 0.98f; // Power estimation decay rate. 37 const float kDecayRate = 0.98f; // Power estimation decay rate.
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.
39 const float kRho = 0.0004f; // Default production and interpretation SNR. 39 const float kRho = 0.0004f; // Default production and interpretation SNR.
40 40
41 // Returns dot product of vectors |a| and |b| with size |length|. 41 // Returns dot product of vectors |a| and |b| with size |length|.
42 float DotProduct(const float* a, const float* b, size_t length) { 42 float DotProduct(const float* a, const float* b, size_t length) {
43 float ret = 0.f; 43 float ret = 0.f;
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
155 gain_applier_.Apply(in_block[i], out_block[i]); 155 gain_applier_.Apply(in_block[i], out_block[i]);
156 } 156 }
157 } 157 }
158 158
159 void IntelligibilityEnhancer::SolveForLambda(float power_target) { 159 void IntelligibilityEnhancer::SolveForLambda(float power_target) {
160 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values 160 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
161 const int kMaxIters = 100; // for these, based on experiments. 161 const int kMaxIters = 100; // for these, based on experiments.
162 162
163 const float reciprocal_power_target = 163 const float reciprocal_power_target =
164 1.f / (power_target + std::numeric_limits<float>::epsilon()); 164 1.f / (power_target + std::numeric_limits<float>::epsilon());
165 float lambda_bot = kLambdaBot; 165 double lambda_bot = kLambdaBot;
166 float lambda_top = kLambdaTop; 166 double lambda_top = kLambdaTop;
167 float power_ratio = 2.f; // Ratio of achieved power to target power. 167 float power_ratio = 2.f; // Ratio of achieved power to target power.
168 int iters = 0; 168 int iters = 0;
169 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { 169 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
170 const float lambda = (lambda_bot + lambda_top) / 2.f; 170 const double lambda = (lambda_bot + lambda_top) / 2.0;
peah-webrtc 2016/03/30 13:44:45 lambda is always inbetween lambda_bot and lambda_t
aluebs-webrtc 2016/03/31 00:26:32 Good point, although I was trying to keep consiste
171 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); 171 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
172 const float power = 172 const float power =
173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
174 if (power < power_target) { 174 if (power < power_target) {
175 lambda_bot = lambda; 175 lambda_bot = lambda;
176 } else { 176 } else {
177 lambda_top = lambda; 177 lambda_top = lambda;
178 } 178 }
179 power_ratio = std::fabs(power * reciprocal_power_target); 179 power_ratio = std::fabs(power * reciprocal_power_target);
180 ++iters; 180 ++iters;
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 for (size_t j = 0; j < bank_size_; ++j) { 260 for (size_t j = 0; j < bank_size_; ++j) {
261 sum += filter_bank[j][i]; 261 sum += filter_bank[j][i];
262 } 262 }
263 for (size_t j = 0; j < bank_size_; ++j) { 263 for (size_t j = 0; j < bank_size_; ++j) {
264 filter_bank[j][i] /= sum; 264 filter_bank[j][i] /= sum;
265 } 265 }
266 } 266 }
267 return filter_bank; 267 return filter_bank;
268 } 268 }
269 269
270 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, 270 void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda,
271 size_t start_freq, 271 size_t start_freq,
272 float* sols) { 272 float* sols) {
273 const float kMinPower = 1e-5f; 273 const float kMinPower = 1e-5f;
274 274
275 const float* pow_x0 = filtered_clear_pow_.data(); 275 const float* pow_x0 = filtered_clear_pow_.data();
276 const float* pow_n0 = filtered_noise_pow_.data(); 276 const float* pow_n0 = filtered_noise_pow_.data();
277 277
278 for (size_t n = 0; n < start_freq; ++n) { 278 for (size_t n = 0; n < start_freq; ++n) {
279 sols[n] = 1.f; 279 sols[n] = 1.f;
280 } 280 }
281 281
282 // Analytic solution for optimal gains. See paper for derivation. 282 // Analytic solution for optimal gains. See paper for derivation.
283 for (size_t n = start_freq; n < bank_size_; ++n) { 283 for (size_t n = start_freq; n < bank_size_; ++n) {
284 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { 284 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
285 sols[n] = 1.f; 285 sols[n] = 1.f;
286 } else { 286 } else {
287 const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + 287 const double gamma0 = 0.5 * kRho * pow_x0[n] * pow_n0[n] +
peah-webrtc 2016/03/30 13:44:46 I cannot see from this equations what difference t
aluebs-webrtc 2016/03/31 00:26:32 Basically because each alpha0, beta0 or gamma0 has
288 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; 288 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
289 const float beta0 = 289 const double beta0 =
290 lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; 290 lambda * pow_x0[n] * (2.0 - kRho) * pow_x0[n] * pow_n0[n];
291 const float alpha0 = 291 const double alpha0 =
292 lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; 292 lambda * pow_x0[n] * (1.0 - kRho) * pow_x0[n] * pow_x0[n];
293 RTC_DCHECK_LT(alpha0, 0.f); 293 RTC_DCHECK_LT(alpha0, 0.0);
294 // The quadratic equation should always have real roots, but to guard 294 // The quadratic equation should always have real roots, but to guard
295 // against numerical errors we limit it to a minimum of zero. 295 // against numerical errors we limit it to a minimum of zero.
296 sols[n] = std::max( 296 sols[n] = std::max(
297 0.f, (-beta0 - std::sqrt(std::max( 297 0.0, (-beta0 - std::sqrt(std::max(
298 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) / 298 0.0, beta0 * beta0 - 4.0 * alpha0 * gamma0))) /
299 (2.f * alpha0)); 299 (2.0 * alpha0));
300 } 300 }
301 } 301 }
302 } 302 }
303 303
304 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { 304 bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
305 FloatToS16(audio, chunk_length_, audio_s16_.data()); 305 FloatToS16(audio, chunk_length_, audio_s16_.data());
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
308 chunks_since_voice_ = 0; 308 chunks_since_voice_ = 0;
309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
310 ++chunks_since_voice_; 310 ++chunks_since_voice_;
311 } 311 }
312 return chunks_since_voice_ < kSpeechOffsetDelay; 312 return chunks_since_voice_ < kSpeechOffsetDelay;
313 } 313 }
314 314
315 } // namespace webrtc 315 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698