Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(318)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1207353002: Add new variance update option and unittests for intelligibility (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Addressed comments from hlundin Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 // 11 //
12 // Implements core class for intelligibility enhancer. 12 // Implements core class for intelligibility enhancer.
13 // 13 //
14 // Details of the model and algorithm can be found in the original paper: 14 // Details of the model and algorithm can be found in the original paper:
15 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 15 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
16 // 16 //
17 17
18 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h" 18 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
19 19
20 #include <cmath> 20 #include <cmath>
21 #include <cstdlib> 21 #include <cstdlib>
22 22
23 #include <algorithm> 23 #include <algorithm>
24 #include <numeric> 24 #include <numeric>
25 25
26 #include "webrtc/base/checks.h" 26 #include "webrtc/base/checks.h"
27 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 27 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
28 #include "webrtc/common_audio/window_generator.h" 28 #include "webrtc/common_audio/window_generator.h"
29 29
30 namespace {
Andrew MacDonald 2015/07/02 02:46:48 Not a big deal, but normally this is still wrapped
ekm 2015/07/07 21:57:02 Done.
31
32 const int kErbResolution = 2;
33 const int kWindowSizeMs = 2;
34 const int kChunkSizeMs = 10; // Size provided by APM.
35 const float kClipFreq = 200.0f;
36 const float kConfigRho = 0.02f; // Default production and interpretation SNR.
37 const float kKbdAlpha = 1.5f;
38 const float kLambdaBot = -1.0; // Extreme values in bisection
39 const float kLambdaTop = -10e-18f; // search for lamda.
40
41 } // namespace
42
43 namespace webrtc {
44
30 using std::complex; 45 using std::complex;
31 using std::max; 46 using std::max;
32 using std::min; 47 using std::min;
33
34 namespace webrtc {
35
36 const int IntelligibilityEnhancer::kErbResolution = 2;
37 const int IntelligibilityEnhancer::kWindowSizeMs = 2;
38 const int IntelligibilityEnhancer::kChunkSizeMs = 10; // Size provided by APM.
39 const int IntelligibilityEnhancer::kAnalyzeRate = 800;
40 const int IntelligibilityEnhancer::kVarianceRate = 2;
41 const float IntelligibilityEnhancer::kClipFreq = 200.0f;
42 const float IntelligibilityEnhancer::kConfigRho = 0.02f;
43 const float IntelligibilityEnhancer::kKbdAlpha = 1.5f;
44
45 // To disable gain update smoothing, set gain limit to be VERY high.
46 // TODO(ekmeyerson): Add option to disable gain smoothing altogether
47 // to avoid the extra computation.
48 const float IntelligibilityEnhancer::kGainChangeLimit = 0.0125f;
49
50 using VarianceType = intelligibility::VarianceArray::StepType; 48 using VarianceType = intelligibility::VarianceArray::StepType;
51 49
52 IntelligibilityEnhancer::TransformCallback::TransformCallback( 50 IntelligibilityEnhancer::TransformCallback::TransformCallback(
53 IntelligibilityEnhancer* parent, 51 IntelligibilityEnhancer* parent,
54 IntelligibilityEnhancer::AudioSource source) 52 IntelligibilityEnhancer::AudioSource source)
55 : parent_(parent), source_(source) { 53 : parent_(parent), source_(source) {
56 } 54 }
57 55
58 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( 56 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
59 const complex<float>* const* in_block, 57 const complex<float>* const* in_block,
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
196 ProcessClearBlock(in_block, out_block); 194 ProcessClearBlock(in_block, out_block);
197 break; 195 break;
198 case kCaptureStream: 196 case kCaptureStream:
199 ProcessNoiseBlock(in_block, out_block); 197 ProcessNoiseBlock(in_block, out_block);
200 break; 198 break;
201 } 199 }
202 } 200 }
203 201
204 void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, 202 void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
205 complex<float>* out_block) { 203 complex<float>* out_block) {
206 float power_target;
207
208 if (block_count_ < 2) { 204 if (block_count_ < 2) {
209 memset(out_block, 0, freqs_ * sizeof(*out_block)); 205 memset(out_block, 0, freqs_ * sizeof(*out_block));
210 ++block_count_; 206 ++block_count_;
211 return; 207 return;
212 } 208 }
213 209
214 // For now, always assumes enhancement is necessary. 210 // For now, always assumes enhancement is necessary.
215 // TODO(ekmeyerson): Change to only enhance if necessary, 211 // TODO(ekmeyerson): Change to only enhance if necessary,
216 // based on experiments with different cutoffs. 212 // based on experiments with different cutoffs.
217 if (has_voice_low_ || true) { 213 if (has_voice_low_ || true) {
218 clear_variance_.Step(in_block, false); 214 clear_variance_.Step(in_block, false);
219 power_target = std::accumulate(clear_variance_.variance(), 215 float power_target = std::accumulate(
Andrew MacDonald 2015/07/02 02:46:48 const
ekm 2015/07/07 21:57:02 Done.
220 clear_variance_.variance() + freqs_, 0.0f); 216 clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.0f);
221 217
222 if (block_count_ % analysis_rate_ == analysis_rate_ - 1) { 218 if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
223 AnalyzeClearBlock(power_target); 219 AnalyzeClearBlock(power_target);
224 ++analysis_step_; 220 ++analysis_step_;
225 if (analysis_step_ == variance_rate_) { 221 if (analysis_step_ == variance_rate_) {
226 analysis_step_ = 0; 222 analysis_step_ = 0;
227 clear_variance_.Clear(); 223 clear_variance_.Clear();
228 noise_variance_.Clear(); 224 noise_variance_.Clear();
229 } 225 }
230 } 226 }
231 ++block_count_; 227 ++block_count_;
232 } 228 }
233 229
234 /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */ 230 /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */
235 gain_applier_.Apply(in_block, out_block); 231 gain_applier_.Apply(in_block, out_block);
236 } 232 }
237 233
238 void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { 234 void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
239 FilterVariance(clear_variance_.variance(), filtered_clear_var_.get()); 235 FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());
240 FilterVariance(noise_variance_.variance(), filtered_noise_var_.get()); 236 FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());
241 237
242 // Bisection search for optimal |lambda| 238 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
239 float power_top =
Andrew MacDonald 2015/07/02 02:46:48 const
ekm 2015/07/07 21:57:02 Done.
240 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
241 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
242 float power_bot =
Andrew MacDonald 2015/07/02 02:46:48 const
ekm 2015/07/07 21:57:02 Done.
243 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
244 if (power_target >= power_bot && power_target <= power_top) {
245 SolveForLambda(power_target, power_bot, power_top);
246 UpdateErbGains();
247 } // Else experiencing variance underflow, so do nothing.
248 }
243 249
244 float lambda_bot = -1.0f, lambda_top = -10e-18f, lambda; 250 void IntelligibilityEnhancer::SolveForLambda(float power_target,
245 float power_bot, power_top, power; 251 float power_bot,
246 SolveForGainsGivenLambda(lambda_top, start_freq_, gains_eq_.get()); 252 float power_top) {
247 power_top = 253 float lambda_bot = kLambdaBot;
248 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); 254 float lambda_top = kLambdaTop;
249 SolveForGainsGivenLambda(lambda_bot, start_freq_, gains_eq_.get());
250 power_bot =
251 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
252 DCHECK(power_target >= power_bot && power_target <= power_top);
253
254 float power_ratio = 2.0f; // Ratio of achieved power to target power. 255 float power_ratio = 2.0f; // Ratio of achieved power to target power.
255 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values 256 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
256 const int kMaxIters = 100; // for these, based on experiments. 257 const int kMaxIters = 100; // for these, based on experiments.
257 int iters = 0; 258 int iters = 0;
258 while (fabs(power_ratio - 1.0f) > kConvergeThresh && iters <= kMaxIters) { 259 while (fabs(power_ratio - 1.0f) > kConvergeThresh && iters <= kMaxIters) {
259 lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f; 260 float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
Andrew MacDonald 2015/07/02 02:46:48 const
ekm 2015/07/07 21:57:02 Done.
260 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); 261 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
261 power = DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); 262 float power =
Andrew MacDonald 2015/07/02 02:46:47 const
ekm 2015/07/07 21:57:02 Done.
263 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
262 if (power < power_target) { 264 if (power < power_target) {
263 lambda_bot = lambda; 265 lambda_bot = lambda;
264 } else { 266 } else {
265 lambda_top = lambda; 267 lambda_top = lambda;
266 } 268 }
267 power_ratio = fabs(power / power_target); 269 power_ratio = fabs(power / power_target);
268 ++iters; 270 ++iters;
269 } 271 }
272 }
270 273
274 void IntelligibilityEnhancer::UpdateErbGains() {
271 // (ERB gain) = filterbank' * (freq gain) 275 // (ERB gain) = filterbank' * (freq gain)
272 float* gains = gain_applier_.target(); 276 float* gains = gain_applier_.target();
273 for (int i = 0; i < freqs_; ++i) { 277 for (int i = 0; i < freqs_; ++i) {
274 gains[i] = 0.0f; 278 gains[i] = 0.0f;
275 for (int j = 0; j < bank_size_; ++j) { 279 for (int j = 0; j < bank_size_; ++j) {
276 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); 280 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
277 } 281 }
278 } 282 }
279 } 283 }
280 284
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 int length) { 401 int length) {
398 float ret = 0.0f; 402 float ret = 0.0f;
399 403
400 for (int i = 0; i < length; ++i) { 404 for (int i = 0; i < length; ++i) {
401 ret = fmaf(a[i], b[i], ret); 405 ret = fmaf(a[i], b[i], ret);
402 } 406 }
403 return ret; 407 return ret;
404 } 408 }
405 409
406 } // namespace webrtc 410 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698