webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1207353002: Add new variance update option and unittests for intelligibility

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1207353002: Add new variance update option and unittests for intelligibility (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 24 matching lines...) Expand all Loading...
35	35

36 const int IntelligibilityEnhancer::kErbResolution = 2;	36 const int IntelligibilityEnhancer::kErbResolution = 2;

37 const int IntelligibilityEnhancer::kWindowSizeMs = 2;	37 const int IntelligibilityEnhancer::kWindowSizeMs = 2;

38 const int IntelligibilityEnhancer::kChunkSizeMs = 10; // Size provided by APM.	38 const int IntelligibilityEnhancer::kChunkSizeMs = 10; // Size provided by APM.

39 const int IntelligibilityEnhancer::kAnalyzeRate = 800;	39 const int IntelligibilityEnhancer::kAnalyzeRate = 800;

40 const int IntelligibilityEnhancer::kVarianceRate = 2;	40 const int IntelligibilityEnhancer::kVarianceRate = 2;

41 const float IntelligibilityEnhancer::kClipFreq = 200.0f;	41 const float IntelligibilityEnhancer::kClipFreq = 200.0f;

42 const float IntelligibilityEnhancer::kConfigRho = 0.02f;	42 const float IntelligibilityEnhancer::kConfigRho = 0.02f;

43 const float IntelligibilityEnhancer::kKbdAlpha = 1.5f;	43 const float IntelligibilityEnhancer::kKbdAlpha = 1.5f;

44	44

	45 const float IntelligibilityEnhancer::kLambdaBot = -1.0;

	46 const float IntelligibilityEnhancer::kLambdaTop = -10e-18f;

	47

45 // To disable gain update smoothing, set gain limit to be VERY high.	48 // To disable gain update smoothing, set gain limit to be VERY high.

46 // TODO(ekmeyerson): Add option to disable gain smoothing altogether	49 // TODO(ekmeyerson): Add option to disable gain smoothing altogether

47 // to avoid the extra computation.	50 // to avoid the extra computation.

48 const float IntelligibilityEnhancer::kGainChangeLimit = 0.0125f;	51 const float IntelligibilityEnhancer::kGainChangeLimit = 0.0125f;

49	52

50 using VarianceType = intelligibility::VarianceArray::StepType;	53 using VarianceType = intelligibility::VarianceArray::StepType;

51	54

52 IntelligibilityEnhancer::TransformCallback::TransformCallback(	55 IntelligibilityEnhancer::TransformCallback::TransformCallback(

53 IntelligibilityEnhancer* parent,	56 IntelligibilityEnhancer* parent,

54 IntelligibilityEnhancer::AudioSource source)	57 IntelligibilityEnhancer::AudioSource source)

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
232 }	235 }

233	236

234 /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */	237 /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */

235 gain_applier_.Apply(in_block, out_block);	238 gain_applier_.Apply(in_block, out_block);

236 }	239 }

237	240

238 void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {	241 void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {

239 FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());	242 FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());

240 FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());	243 FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());

241	244

242 // Bisection search for optimal \|lambda\|	245 float power_bot, power_top;

243	246 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());

244 float lambda_bot = -1.0f, lambda_top = -10e-18f, lambda;

245 float power_bot, power_top, power;

246 SolveForGainsGivenLambda(lambda_top, start_freq_, gains_eq_.get());

247 power_top =	247 power_top =

248 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);	248 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);

249 SolveForGainsGivenLambda(lambda_bot, start_freq_, gains_eq_.get());	249 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());

250 power_bot =	250 power_bot =

251 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);	251 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);

252 DCHECK(power_target >= power_bot && power_target <= power_top);	252 if(power_target >= power_bot && power_target <= power_top) {

	253 SolveForLambda(power_target, power_bot, power_top);

	254 } else {

	255 // Experiencing underflow; no speech; does not modify gains.

	256 for (int i = 0; i < freqs_; ++i) {

	257 gains_eq_[i] = 1.0f;
	turaj 2015/06/26 00:32:57 Does this really mean that are not changing gains? Does this really mean that are not changing gains? What if we do nothing, then we will have same \|gains_eq_\| from the past and UpdateErbGains() will result in the same gains. It that is correct we even don't need to run UpdateErbGains() either. ekm 2015/06/26 19:07:09 Done. You're right. I was thinking that if the pre Show quoted text On 2015/06/26 00:32:57, turaj wrote: > Does this really mean that are not changing gains? What if we do nothing, then > we will have same \|gains_eq_\| from the past and UpdateErbGains() will result in > the same gains. It that is correct we even don't need to run UpdateErbGains() > either. Done. You're right. I was thinking that if the previous \|gains_eq_\| were high then it would be strange to boost a ~0 variance signal, but it results in ~0 signal either way, and this saves computation. When speech eventually comes back on, the previous \|gains_eq_\| will still apply for a window, but that should be ok.
	258 }

	259 }

	260 UpdateErbGains();

	261 }

253	262

	263 void IntelligibilityEnhancer::SolveForLambda(float power_target,

	264 float power_bot,

	265 float power_top) {

	266 float lambda_bot = kLambdaBot;

	267 float lambda_top = kLambdaTop;

	268 float lambda, power;

254 float power_ratio = 2.0f; // Ratio of achieved power to target power.	269 float power_ratio = 2.0f; // Ratio of achieved power to target power.

255 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values	270 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values

256 const int kMaxIters = 100; // for these, based on experiments.	271 const int kMaxIters = 100; // for these, based on experiments.

257 int iters = 0;	272 int iters = 0;

258 while (fabs(power_ratio - 1.0f) > kConvergeThresh && iters <= kMaxIters) {	273 while (fabs(power_ratio - 1.0f) > kConvergeThresh && iters <= kMaxIters) {

259 lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;	274 lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;

260 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());	275 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());

261 power = DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);	276 power = DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);

262 if (power < power_target) {	277 if (power < power_target) {

263 lambda_bot = lambda;	278 lambda_bot = lambda;

264 } else {	279 } else {

265 lambda_top = lambda;	280 lambda_top = lambda;

266 }	281 }

267 power_ratio = fabs(power / power_target);	282 power_ratio = fabs(power / power_target);

268 ++iters;	283 ++iters;

269 }	284 }

	285 }

270	286

	287 void IntelligibilityEnhancer::UpdateErbGains() {

271 // (ERB gain) = filterbank' * (freq gain)	288 // (ERB gain) = filterbank' * (freq gain)

272 float* gains = gain_applier_.target();	289 float* gains = gain_applier_.target();

273 for (int i = 0; i < freqs_; ++i) {	290 for (int i = 0; i < freqs_; ++i) {

274 gains[i] = 0.0f;	291 gains[i] = 0.0f;

275 for (int j = 0; j < bank_size_; ++j) {	292 for (int j = 0; j < bank_size_; ++j) {

276 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);	293 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);

277 }	294 }

278 }	295 }

279 }	296 }

280	297

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
397 int length) {	414 int length) {

398 float ret = 0.0f;	415 float ret = 0.0f;

399	416

400 for (int i = 0; i < length; ++i) {	417 for (int i = 0; i < length; ++i) {

401 ret = fmaf(a[i], b[i], ret);	418 ret = fmaf(a[i], b[i], ret);

402 }	419 }

403 return ret;	420 return ret;

404 }	421 }

405	422

406 } // namespace webrtc	423 } // namespace webrtc

OLD	NEW