Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master
Patch Set: Resync Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 12 matching lines...) Expand all
23 #include <numeric> 23 #include <numeric>
24 24
25 #include "webrtc/base/checks.h" 25 #include "webrtc/base/checks.h"
26 #include "webrtc/common_audio/include/audio_util.h" 26 #include "webrtc/common_audio/include/audio_util.h"
27 #include "webrtc/common_audio/window_generator.h" 27 #include "webrtc/common_audio/window_generator.h"
28 28
29 namespace webrtc { 29 namespace webrtc {
30 30
31 namespace { 31 namespace {
32 32
33 const int kErbResolution = 2; 33 const size_t kErbResolution = 2;
34 const int kWindowSizeMs = 2; 34 const int kWindowSizeMs = 2;
35 const int kChunkSizeMs = 10; // Size provided by APM. 35 const int kChunkSizeMs = 10; // Size provided by APM.
36 const float kClipFreq = 200.0f; 36 const float kClipFreq = 200.0f;
37 const float kConfigRho = 0.02f; // Default production and interpretation SNR. 37 const float kConfigRho = 0.02f; // Default production and interpretation SNR.
38 const float kKbdAlpha = 1.5f; 38 const float kKbdAlpha = 1.5f;
39 const float kLambdaBot = -1.0f; // Extreme values in bisection 39 const float kLambdaBot = -1.0f; // Extreme values in bisection
40 const float kLambdaTop = -10e-18f; // search for lamda. 40 const float kLambdaTop = -10e-18f; // search for lamda.
41 41
42 } // namespace 42 } // namespace
43 43
44 using std::complex; 44 using std::complex;
45 using std::max; 45 using std::max;
46 using std::min; 46 using std::min;
47 using VarianceType = intelligibility::VarianceArray::StepType; 47 using VarianceType = intelligibility::VarianceArray::StepType;
48 48
49 IntelligibilityEnhancer::TransformCallback::TransformCallback( 49 IntelligibilityEnhancer::TransformCallback::TransformCallback(
50 IntelligibilityEnhancer* parent, 50 IntelligibilityEnhancer* parent,
51 IntelligibilityEnhancer::AudioSource source) 51 IntelligibilityEnhancer::AudioSource source)
52 : parent_(parent), source_(source) { 52 : parent_(parent), source_(source) {
53 } 53 }
54 54
55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( 55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
56 const complex<float>* const* in_block, 56 const complex<float>* const* in_block,
57 int in_channels, 57 int in_channels,
58 int frames, 58 size_t frames,
59 int /* out_channels */, 59 int /* out_channels */,
60 complex<float>* const* out_block) { 60 complex<float>* const* out_block) {
61 DCHECK_EQ(parent_->freqs_, frames); 61 DCHECK_EQ(parent_->freqs_, frames);
62 for (int i = 0; i < in_channels; ++i) { 62 for (int i = 0; i < in_channels; ++i) {
63 parent_->DispatchAudio(source_, in_block[i], out_block[i]); 63 parent_->DispatchAudio(source_, in_block[i], out_block[i]);
64 } 64 }
65 } 65 }
66 66
67 IntelligibilityEnhancer::IntelligibilityEnhancer() 67 IntelligibilityEnhancer::IntelligibilityEnhancer()
68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { 68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
69 } 69 }
70 70
71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) 71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
72 : freqs_(RealFourier::ComplexLength( 72 : freqs_(RealFourier::ComplexLength(
73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), 73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
74 window_size_(1 << RealFourier::FftOrder(freqs_)), 74 window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
75 chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000), 75 chunk_length_(
76 static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),
76 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), 77 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
77 sample_rate_hz_(config.sample_rate_hz), 78 sample_rate_hz_(config.sample_rate_hz),
78 erb_resolution_(kErbResolution), 79 erb_resolution_(kErbResolution),
79 num_capture_channels_(config.num_capture_channels), 80 num_capture_channels_(config.num_capture_channels),
80 num_render_channels_(config.num_render_channels), 81 num_render_channels_(config.num_render_channels),
81 analysis_rate_(config.analysis_rate), 82 analysis_rate_(config.analysis_rate),
82 active_(true), 83 active_(true),
83 clear_variance_(freqs_, 84 clear_variance_(freqs_,
84 config.var_type, 85 config.var_type,
85 config.var_window_size, 86 config.var_window_size,
(...skipping 14 matching lines...) Expand all
100 kbd_window_(new float[window_size_]), 101 kbd_window_(new float[window_size_]),
101 render_callback_(this, AudioSource::kRenderStream), 102 render_callback_(this, AudioSource::kRenderStream),
102 capture_callback_(this, AudioSource::kCaptureStream), 103 capture_callback_(this, AudioSource::kCaptureStream),
103 block_count_(0), 104 block_count_(0),
104 analysis_step_(0) { 105 analysis_step_(0) {
105 DCHECK_LE(config.rho, 1.0f); 106 DCHECK_LE(config.rho, 1.0f);
106 107
107 CreateErbBank(); 108 CreateErbBank();
108 109
109 // Assumes all rho equal. 110 // Assumes all rho equal.
110 for (int i = 0; i < bank_size_; ++i) { 111 for (size_t i = 0; i < bank_size_; ++i) {
111 rho_[i] = config.rho * config.rho; 112 rho_[i] = config.rho * config.rho;
112 } 113 }
113 114
114 float freqs_khz = kClipFreq / 1000.0f; 115 float freqs_khz = kClipFreq / 1000.0f;
115 int erb_index = static_cast<int>(ceilf( 116 size_t erb_index = static_cast<size_t>(ceilf(
116 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); 117 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
117 start_freq_ = max(1, erb_index * erb_resolution_); 118 start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);
118 119
119 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, 120 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
120 kbd_window_.get()); 121 kbd_window_.get());
121 render_mangler_.reset(new LappedTransform( 122 render_mangler_.reset(new LappedTransform(
122 num_render_channels_, num_render_channels_, chunk_length_, 123 num_render_channels_, num_render_channels_, chunk_length_,
123 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); 124 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
124 capture_mangler_.reset(new LappedTransform( 125 capture_mangler_.reset(new LappedTransform(
125 num_capture_channels_, num_capture_channels_, chunk_length_, 126 num_capture_channels_, num_capture_channels_, chunk_length_,
126 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); 127 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));
127 } 128 }
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
231 lambda_top = lambda; 232 lambda_top = lambda;
232 } 233 }
233 power_ratio = std::fabs(power * reciprocal_power_target); 234 power_ratio = std::fabs(power * reciprocal_power_target);
234 ++iters; 235 ++iters;
235 } 236 }
236 } 237 }
237 238
238 void IntelligibilityEnhancer::UpdateErbGains() { 239 void IntelligibilityEnhancer::UpdateErbGains() {
239 // (ERB gain) = filterbank' * (freq gain) 240 // (ERB gain) = filterbank' * (freq gain)
240 float* gains = gain_applier_.target(); 241 float* gains = gain_applier_.target();
241 for (int i = 0; i < freqs_; ++i) { 242 for (size_t i = 0; i < freqs_; ++i) {
242 gains[i] = 0.0f; 243 gains[i] = 0.0f;
243 for (int j = 0; j < bank_size_; ++j) { 244 for (size_t j = 0; j < bank_size_; ++j) {
244 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); 245 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
245 } 246 }
246 } 247 }
247 } 248 }
248 249
249 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, 250 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,
250 complex<float>* /*out_block*/) { 251 complex<float>* /*out_block*/) {
251 noise_variance_.Step(in_block); 252 noise_variance_.Step(in_block);
252 } 253 }
253 254
254 int IntelligibilityEnhancer::GetBankSize(int sample_rate, int erb_resolution) { 255 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
256 size_t erb_resolution) {
255 float freq_limit = sample_rate / 2000.0f; 257 float freq_limit = sample_rate / 2000.0f;
256 int erb_scale = ceilf( 258 size_t erb_scale = static_cast<size_t>(ceilf(
257 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f); 259 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
258 return erb_scale * erb_resolution; 260 return erb_scale * erb_resolution;
259 } 261 }
260 262
261 void IntelligibilityEnhancer::CreateErbBank() { 263 void IntelligibilityEnhancer::CreateErbBank() {
262 int lf = 1, rf = 4; 264 size_t lf = 1, rf = 4;
263 265
264 for (int i = 0; i < bank_size_; ++i) { 266 for (size_t i = 0; i < bank_size_; ++i) {
265 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); 267 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
266 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); 268 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
267 center_freqs_[i] -= 14678.49f; 269 center_freqs_[i] -= 14678.49f;
268 } 270 }
269 float last_center_freq = center_freqs_[bank_size_ - 1]; 271 float last_center_freq = center_freqs_[bank_size_ - 1];
270 for (int i = 0; i < bank_size_; ++i) { 272 for (size_t i = 0; i < bank_size_; ++i) {
271 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; 273 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
272 } 274 }
273 275
274 for (int i = 0; i < bank_size_; ++i) { 276 for (size_t i = 0; i < bank_size_; ++i) {
275 filter_bank_[i].resize(freqs_); 277 filter_bank_[i].resize(freqs_);
276 } 278 }
277 279
278 for (int i = 1; i <= bank_size_; ++i) { 280 for (size_t i = 1; i <= bank_size_; ++i) {
279 int lll, ll, rr, rrr; 281 size_t lll, ll, rr, rrr;
280 lll = round(center_freqs_[max(1, i - lf) - 1] * freqs_ / 282 static const size_t kOne = 1; // Avoids repeated static_cast<>s below.
281 (0.5f * sample_rate_hz_)); 283 lll = static_cast<size_t>(round(
282 ll = 284 center_freqs_[max(kOne, i - lf) - 1] * freqs_ /
283 round(center_freqs_[max(1, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)); 285 (0.5f * sample_rate_hz_)));
284 lll = min(freqs_, max(lll, 1)) - 1; 286 ll = static_cast<size_t>(round(
285 ll = min(freqs_, max(ll, 1)) - 1; 287 center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));
288 lll = min(freqs_, max(lll, kOne)) - 1;
289 ll = min(freqs_, max(ll, kOne)) - 1;
286 290
287 rrr = round(center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / 291 rrr = static_cast<size_t>(round(
288 (0.5f * sample_rate_hz_)); 292 center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
289 rr = round(center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / 293 (0.5f * sample_rate_hz_)));
290 (0.5f * sample_rate_hz_)); 294 rr = static_cast<size_t>(round(
291 rrr = min(freqs_, max(rrr, 1)) - 1; 295 center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
292 rr = min(freqs_, max(rr, 1)) - 1; 296 (0.5f * sample_rate_hz_)));
297 rrr = min(freqs_, max(rrr, kOne)) - 1;
298 rr = min(freqs_, max(rr, kOne)) - 1;
293 299
294 float step, element; 300 float step, element;
295 301
296 step = 1.0f / (ll - lll); 302 step = 1.0f / (ll - lll);
297 element = 0.0f; 303 element = 0.0f;
298 for (int j = lll; j <= ll; ++j) { 304 for (size_t j = lll; j <= ll; ++j) {
299 filter_bank_[i - 1][j] = element; 305 filter_bank_[i - 1][j] = element;
300 element += step; 306 element += step;
301 } 307 }
302 step = 1.0f / (rrr - rr); 308 step = 1.0f / (rrr - rr);
303 element = 1.0f; 309 element = 1.0f;
304 for (int j = rr; j <= rrr; ++j) { 310 for (size_t j = rr; j <= rrr; ++j) {
305 filter_bank_[i - 1][j] = element; 311 filter_bank_[i - 1][j] = element;
306 element -= step; 312 element -= step;
307 } 313 }
308 for (int j = ll; j <= rr; ++j) { 314 for (size_t j = ll; j <= rr; ++j) {
309 filter_bank_[i - 1][j] = 1.0f; 315 filter_bank_[i - 1][j] = 1.0f;
310 } 316 }
311 } 317 }
312 318
313 float sum; 319 float sum;
314 for (int i = 0; i < freqs_; ++i) { 320 for (size_t i = 0; i < freqs_; ++i) {
315 sum = 0.0f; 321 sum = 0.0f;
316 for (int j = 0; j < bank_size_; ++j) { 322 for (size_t j = 0; j < bank_size_; ++j) {
317 sum += filter_bank_[j][i]; 323 sum += filter_bank_[j][i];
318 } 324 }
319 for (int j = 0; j < bank_size_; ++j) { 325 for (size_t j = 0; j < bank_size_; ++j) {
320 filter_bank_[j][i] /= sum; 326 filter_bank_[j][i] /= sum;
321 } 327 }
322 } 328 }
323 } 329 }
324 330
325 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, 331 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
326 int start_freq, 332 size_t start_freq,
327 float* sols) { 333 float* sols) {
328 bool quadratic = (kConfigRho < 1.0f); 334 bool quadratic = (kConfigRho < 1.0f);
329 const float* var_x0 = filtered_clear_var_.get(); 335 const float* var_x0 = filtered_clear_var_.get();
330 const float* var_n0 = filtered_noise_var_.get(); 336 const float* var_n0 = filtered_noise_var_.get();
331 337
332 for (int n = 0; n < start_freq; ++n) { 338 for (size_t n = 0; n < start_freq; ++n) {
333 sols[n] = 1.0f; 339 sols[n] = 1.0f;
334 } 340 }
335 341
336 // Analytic solution for optimal gains. See paper for derivation. 342 // Analytic solution for optimal gains. See paper for derivation.
337 for (int n = start_freq - 1; n < bank_size_; ++n) { 343 for (size_t n = start_freq - 1; n < bank_size_; ++n) {
338 float alpha0, beta0, gamma0; 344 float alpha0, beta0, gamma0;
339 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + 345 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +
340 lambda * var_x0[n] * var_n0[n] * var_n0[n]; 346 lambda * var_x0[n] * var_n0[n] * var_n0[n];
341 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n]; 347 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n];
342 if (quadratic) { 348 if (quadratic) {
343 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; 349 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n];
344 sols[n] = 350 sols[n] =
345 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); 351 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0);
346 } else { 352 } else {
347 sols[n] = -gamma0 / beta0; 353 sols[n] = -gamma0 / beta0;
348 } 354 }
349 sols[n] = fmax(0, sols[n]); 355 sols[n] = fmax(0, sols[n]);
350 } 356 }
351 } 357 }
352 358
353 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { 359 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
354 DCHECK_GT(freqs_, 0); 360 DCHECK_GT(freqs_, 0u);
355 for (int i = 0; i < bank_size_; ++i) { 361 for (size_t i = 0; i < bank_size_; ++i) {
356 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); 362 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);
357 } 363 }
358 } 364 }
359 365
360 float IntelligibilityEnhancer::DotProduct(const float* a, 366 float IntelligibilityEnhancer::DotProduct(const float* a,
361 const float* b, 367 const float* b,
362 int length) { 368 size_t length) {
363 float ret = 0.0f; 369 float ret = 0.0f;
364 370
365 for (int i = 0; i < length; ++i) { 371 for (size_t i = 0; i < length; ++i) {
366 ret = fmaf(a[i], b[i], ret); 372 ret = fmaf(a[i], b[i], ret);
367 } 373 }
368 return ret; 374 return ret;
369 } 375 }
370 376
371 bool IntelligibilityEnhancer::active() const { 377 bool IntelligibilityEnhancer::active() const {
372 return active_; 378 return active_;
373 } 379 }
374 380
375 } // namespace webrtc 381 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698