webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1718793002: Fix the gain calculation in IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1718793002: Fix the gain calculation in IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@vad

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 12 matching lines...) Expand all Loading...
23 namespace webrtc {	23 namespace webrtc {

24	24

25 namespace {	25 namespace {

26	26

27 const size_t kErbResolution = 2;	27 const size_t kErbResolution = 2;

28 const int kWindowSizeMs = 16;	28 const int kWindowSizeMs = 16;

29 const int kChunkSizeMs = 10; // Size provided by APM.	29 const int kChunkSizeMs = 10; // Size provided by APM.

30 const float kClipFreqKhz = 0.2f;	30 const float kClipFreqKhz = 0.2f;

31 const float kKbdAlpha = 1.5f;	31 const float kKbdAlpha = 1.5f;

32 const float kLambdaBot = -1.0f; // Extreme values in bisection	32 const float kLambdaBot = -1.0f; // Extreme values in bisection

33 const float kLambdaTop = -10e-18f; // search for lamda.	33 const float kLambdaTop = -1e-5; // search for lamda.

34 const float kVoiceProbabilityThreshold = 0.02;	34 const float kVoiceProbabilityThreshold = 0.02;

35 // Number of chunks after voice activity which is still considered speech.	35 // Number of chunks after voice activity which is still considered speech.

36 const size_t kSpeechOffsetDelay = 80;	36 const size_t kSpeechOffsetDelay = 80;

37 const float kDecayRate = 0.98f; // Power estimation decay rate.	37 const float kDecayRate = 0.98f; // Power estimation decay rate.

38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.	38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.

39 const float kRho = 0.0004f; // Default production and interpretation SNR.	39 const float kRho = 0.0004f; // Default production and interpretation SNR.

40	40

41 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.	41 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.

42 float DotProduct(const float* a, const float* b, size_t length) {	42 float DotProduct(const float* a, const float* b, size_t length) {

43 float ret = 0.f;	43 float ret = 0.f;

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
157 filtered_noise_pow_.get());	157 filtered_noise_pow_.get());

158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());	158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());

159 const float power_target =	159 const float power_target =

160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);	160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);

161 const float power_top =	161 const float power_top =

162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);	162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);

163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());	163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());

164 const float power_bot =	164 const float power_bot =

165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);	165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);

166 if (power_target >= power_bot && power_target <= power_top) {	166 if (power_target >= power_bot && power_target <= power_top) {

167 SolveForLambda(power_target, power_bot, power_top);	167 SolveForLambda(power_target);

168 UpdateErbGains();	168 UpdateErbGains();

169 } // Else experiencing power underflow, so do nothing.	169 } // Else experiencing power underflow, so do nothing.

170 gain_applier_.Apply(in_block, out_block);	170 gain_applier_.Apply(in_block, out_block);

171 }	171 }

172	172

173 void IntelligibilityEnhancer::SolveForLambda(float power_target,	173 void IntelligibilityEnhancer::SolveForLambda(float power_target) {

174 float power_bot,

175 float power_top) {

176 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values	174 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values

177 const int kMaxIters = 100; // for these, based on experiments.	175 const int kMaxIters = 100; // for these, based on experiments.

178	176

179 const float reciprocal_power_target =	177 const float reciprocal_power_target =

180 1.f / (power_target + std::numeric_limits<float>::epsilon());	178 1.f / (power_target + std::numeric_limits<float>::epsilon());

181 float lambda_bot = kLambdaBot;	179 float lambda_bot = kLambdaBot;

182 float lambda_top = kLambdaTop;	180 float lambda_top = kLambdaTop;

183 float power_ratio = 2.f; // Ratio of achieved power to target power.	181 float power_ratio = 2.f; // Ratio of achieved power to target power.

184 int iters = 0;	182 int iters = 0;

185 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {	183 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {

186 const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.f;	184 const float lambda = (lambda_bot + lambda_top) / 2.f;

187 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());	185 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());

188 const float power =	186 const float power =

189 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);	187 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);

190 if (power < power_target) {	188 if (power < power_target) {

191 lambda_bot = lambda;	189 lambda_bot = lambda;

192 } else {	190 } else {

193 lambda_top = lambda;	191 lambda_top = lambda;

194 }	192 }

195 power_ratio = std::fabs(power * reciprocal_power_target);	193 power_ratio = std::fabs(power * reciprocal_power_target);

196 ++iters;	194 ++iters;

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
281 for (size_t j = 0; j < bank_size_; ++j) {	279 for (size_t j = 0; j < bank_size_; ++j) {

282 filter_bank[j][i] /= sum;	280 filter_bank[j][i] /= sum;

283 }	281 }

284 }	282 }

285 return filter_bank;	283 return filter_bank;

286 }	284 }

287	285

288 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,	286 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,

289 size_t start_freq,	287 size_t start_freq,

290 float* sols) {	288 float* sols) {

291 bool quadratic = (kRho < 1.f);	289 const float kMinPower = 1e-5;

	290

292 const float* pow_x0 = filtered_clear_pow_.get();	291 const float* pow_x0 = filtered_clear_pow_.get();

293 const float* pow_n0 = filtered_noise_pow_.get();	292 const float* pow_n0 = filtered_noise_pow_.get();

294	293

295 for (size_t n = 0; n < start_freq; ++n) {	294 for (size_t n = 0; n < start_freq; ++n) {

296 sols[n] = 1.f;	295 sols[n] = 1.f;

297 }	296 }

298	297

299 // Analytic solution for optimal gains. See paper for derivation.	298 // Analytic solution for optimal gains. See paper for derivation.

300 for (size_t n = start_freq - 1; n < bank_size_; ++n) {	299 for (size_t n = start_freq; n < bank_size_; ++n) {

301 float alpha0, beta0, gamma0;	300 if (pow_x0[n] < kMinPower \|\| pow_n0[n] < kMinPower) {

302 gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +	301 sols[n] = 1.f;

303 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];

304 beta0 = lambda * pow_x0[n] * (2 - kRho) * pow_x0[n] * pow_n0[n];

305 if (quadratic) {

306 alpha0 = lambda * pow_x0[n] * (1 - kRho) * pow_x0[n] * pow_x0[n];

307 sols[n] =

308 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) /

309 (2 * alpha0 + std::numeric_limits<float>::epsilon());

310 } else {	302 } else {

311 sols[n] = -gamma0 / beta0;	303 float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
	hlundin-webrtc 2016/02/22 12:59:22 I like local consts... I like local consts... aluebs-webrtc 2016/02/22 23:56:10 I like them as well, but apparently I have a hard Show quoted text On 2016/02/22 12:59:22, hlundin-webrtc wrote: > I like local consts... I like them as well, but apparently I have a hard time remembering them :) Added the const.
	304 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];

	305 float beta0 = lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];

	306 float alpha0 = lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];

	307 if (beta0 * beta0 < 4.f * alpha0 * gamma0) {
	hlundin-webrtc 2016/02/22 12:59:22 You are essentially calculating beta0 * beta0 - 4. You are essentially calculating beta0 * beta0 - 4.f * alpha0 * gamma0 twice. Why not store it? const float some_good_name = beta0 * beta0 - 4.f * alpha0 * gamma0; if (some_good_name < 0) { ... } else { sols[n] = ... sqrt(some_good_name) ... aluebs-webrtc 2016/02/22 23:56:11 Good point, done. Although I am not creative enoug Show quoted text On 2016/02/22 12:59:22, hlundin-webrtc wrote: > You are essentially calculating beta0 * beta0 - 4.f * alpha0 * gamma0 twice. Why > not store it? > const float some_good_name = beta0 * beta0 - 4.f * alpha0 * gamma0; > if (some_good_name < 0) { > ... > } else { > sols[n] = ... sqrt(some_good_name) ... Good point, done. Although I am not creative enough to find a good name. I called it zero distance, although the actual distance is after taking the square root and dividing by 2a. Any better ideas? hlundin-webrtc 2016/02/24 09:51:00 I don't know the algorithm good enough to suggest Show quoted text On 2016/02/22 23:56:11, aluebs-webrtc wrote: > On 2016/02/22 12:59:22, hlundin-webrtc wrote: > > You are essentially calculating beta0 * beta0 - 4.f * alpha0 * gamma0 twice. > Why > > not store it? > > const float some_good_name = beta0 * beta0 - 4.f * alpha0 * gamma0; > > if (some_good_name < 0) { > > ... > > } else { > > sols[n] = ... sqrt(some_good_name) ... > > Good point, done. Although I am not creative enough to find a good name. I > called it zero distance, although the actual distance is after taking the square > root and dividing by 2a. Any better ideas? I don't know the algorithm good enough to suggest anything better. But if the distance is obtained after sqrt, then maybe squared_distance is an option? I'll leave it up to you; I'm fine with zero_distance, too. turaj 2016/02/24 15:26:42 If you consider my suggestion of using max(0, b^2 Show quoted text On 2016/02/24 09:51:00, hlundin-webrtc wrote: > On 2016/02/22 23:56:11, aluebs-webrtc wrote: > > On 2016/02/22 12:59:22, hlundin-webrtc wrote: > > > You are essentially calculating beta0 * beta0 - 4.f * alpha0 * gamma0 twice. > > Why > > > not store it? > > > const float some_good_name = beta0 * beta0 - 4.f * alpha0 * gamma0; > > > if (some_good_name < 0) { > > > ... > > > } else { > > > sols[n] = ... sqrt(some_good_name) ... > > > > Good point, done. Although I am not creative enough to find a good name. I > > called it zero distance, although the actual distance is after taking the > square > > root and dividing by 2a. Any better ideas? > > I don't know the algorithm good enough to suggest anything better. But if the > distance is obtained after sqrt, then maybe squared_distance is an option? I'll > leave it up to you; I'm fine with zero_distance, too. If you consider my suggestion of using max(0, b^2 - 4ac) there will be no need for extra variable. aluebs-webrtc 2016/02/24 23:40:48 No name is best name :) Show quoted text On 2016/02/24 15:26:42, turaj wrote: > On 2016/02/24 09:51:00, hlundin-webrtc wrote: > > On 2016/02/22 23:56:11, aluebs-webrtc wrote: > > > On 2016/02/22 12:59:22, hlundin-webrtc wrote: > > > > You are essentially calculating beta0 * beta0 - 4.f * alpha0 * gamma0 > twice. > > > Why > > > > not store it? > > > > const float some_good_name = beta0 * beta0 - 4.f * alpha0 * gamma0; > > > > if (some_good_name < 0) { > > > > ... > > > > } else { > > > > sols[n] = ... sqrt(some_good_name) ... > > > > > > Good point, done. Although I am not creative enough to find a good name. I > > > called it zero distance, although the actual distance is after taking the > > square > > > root and dividing by 2a. Any better ideas? > > > > I don't know the algorithm good enough to suggest anything better. But if the > > distance is obtained after sqrt, then maybe squared_distance is an option? > I'll > > leave it up to you; I'm fine with zero_distance, too. > > If you consider my suggestion of using max(0, b^2 - 4ac) there will be no need > for extra variable. No name is best name :)
	308 sols[n] = -beta0 / (2.f * alpha0);
	hlundin-webrtc 2016/02/22 12:59:22 This is not the same as the old code, right? This is not the same as the old code, right? turaj 2016/02/22 16:05:20 My interpretation of the paper Eq 18 is that the q My interpretation of the paper Eq 18 is that the quadratic equation always have real roots. How -beta/(2 * alpha) is derived? The old code considers the non-quadratic case for rho=1 which results in \alpha to be zero. Perhpas I'm missing some point of the paper. aluebs-webrtc 2016/02/22 23:56:10 I agree that the quadratic equation always has rea Show quoted text On 2016/02/22 16:05:20, turaj wrote: > My interpretation of the paper Eq 18 is that the quadratic equation always have > real roots. How -beta/(2 * alpha) is derived? The old code considers the > non-quadratic case for rho=1 which results in \alpha to be zero. Perhpas I'm > missing some point of the paper. I agree that the quadratic equation always has real roots, but here I try to guard against numerical errors. So when this value is slightly negative I just assume it is zero, therefore -b/2a. Does that make sense? turaj 2016/02/24 15:26:42 Thanks for the explanation, it makes total sense, Show quoted text On 2016/02/22 23:56:10, aluebs-webrtc wrote: > On 2016/02/22 16:05:20, turaj wrote: > > My interpretation of the paper Eq 18 is that the quadratic equation always > have > > real roots. How -beta/(2 * alpha) is derived? The old code considers the > > non-quadratic case for rho=1 which results in \alpha to be zero. Perhpas I'm > > missing some point of the paper. > > I agree that the quadratic equation always has real roots, but here I try to > guard against numerical errors. So when this value is slightly negative I just > assume it is zero, therefore -b/2a. Does that make sense? Thanks for the explanation, it makes total sense, if I may suggest, it might be easier to see this if you write sols[n] = (-beta0 - sqrt(std::max(0, beta0^2 - 4 * alpha0 * gamma0)) with a comment as you explained. aluebs-webrtc 2016/02/24 23:40:47 That is a great point, done. Show quoted text On 2016/02/24 15:26:42, turaj wrote: > On 2016/02/22 23:56:10, aluebs-webrtc wrote: > > On 2016/02/22 16:05:20, turaj wrote: > > > My interpretation of the paper Eq 18 is that the quadratic equation always > > have > > > real roots. How -beta/(2 * alpha) is derived? The old code considers the > > > non-quadratic case for rho=1 which results in \alpha to be zero. Perhpas I'm > > > missing some point of the paper. > > > > I agree that the quadratic equation always has real roots, but here I try to > > guard against numerical errors. So when this value is slightly negative I just > > assume it is zero, therefore -b/2a. Does that make sense? > > Thanks for the explanation, it makes total sense, if I may suggest, it might be > easier to see this if you write > > sols[n] = (-beta0 - sqrt(std::max(0, beta0^2 - 4 * alpha0 * gamma0)) > > with a comment as you explained. That is a great point, done.
	309 } else {

	310 sols[n] = (-beta0 - sqrtf(beta0 * beta0 - 4.f * alpha0 * gamma0)) /
	hlundin-webrtc 2016/02/22 12:59:22 No need for regularization any longer? No need for regularization any longer? aluebs-webrtc 2016/02/22 23:56:10 No, because now I check for a minimum power in lin Show quoted text On 2016/02/22 12:59:22, hlundin-webrtc wrote: > No need for regularization any longer? No, because now I check for a minimum power in line 300, which (knowing 0<kRho<1 and lambda<0) ensures alpha0>0. I only added this regularization for the zero-division error, but I found afterwards that epsilon was probably too-large for this, since it changed the results. hlundin-webrtc 2016/02/24 09:51:00 You may want to add a DCHECK to document/verify yo Show quoted text On 2016/02/22 23:56:10, aluebs-webrtc wrote: > On 2016/02/22 12:59:22, hlundin-webrtc wrote: > > No need for regularization any longer? > > No, because now I check for a minimum power in line 300, which (knowing 0<kRho<1 > and lambda<0) ensures alpha0>0. I only added this regularization for the > zero-division error, but I found afterwards that epsilon was probably too-large > for this, since it changed the results. You may want to add a DCHECK to document/verify your assumptions. aluebs-webrtc 2016/02/24 23:40:47 Done. Show quoted text On 2016/02/24 09:51:00, hlundin-webrtc wrote: > On 2016/02/22 23:56:10, aluebs-webrtc wrote: > > On 2016/02/22 12:59:22, hlundin-webrtc wrote: > > > No need for regularization any longer? > > > > No, because now I check for a minimum power in line 300, which (knowing > 0<kRho<1 > > and lambda<0) ensures alpha0>0. I only added this regularization for the > > zero-division error, but I found afterwards that epsilon was probably > too-large > > for this, since it changed the results. > > You may want to add a DCHECK to document/verify your assumptions. Done.
	311 (2.f * alpha0);

	312 }

	313 sols[n] = fmax(0.f, sols[n]);

312 }	314 }

313 sols[n] = fmax(0, sols[n]);

314 }	315 }

315 }	316 }

316	317

317 bool IntelligibilityEnhancer::IsSpeech(const float* audio) {	318 bool IntelligibilityEnhancer::IsSpeech(const float* audio) {

318 FloatToS16(audio, chunk_length_, &audio_s16_[0]);	319 FloatToS16(audio, chunk_length_, &audio_s16_[0]);

319 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);	320 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);

320 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	321 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

321 chunks_since_voice_ = 0;	322 chunks_since_voice_ = 0;

322 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	323 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

323 ++chunks_since_voice_;	324 ++chunks_since_voice_;

324 }	325 }

325 return chunks_since_voice_ < kSpeechOffsetDelay;	326 return chunks_since_voice_ < kSpeechOffsetDelay;

326 }	327 }

327	328

328 } // namespace webrtc	329 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »