Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(35)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1227213002: Update audio code to use size_t more correctly, webrtc/modules/audio_processing/ (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master
Patch Set: Resync Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 48
49 IntelligibilityEnhancer::TransformCallback::TransformCallback( 49 IntelligibilityEnhancer::TransformCallback::TransformCallback(
50 IntelligibilityEnhancer* parent, 50 IntelligibilityEnhancer* parent,
51 IntelligibilityEnhancer::AudioSource source) 51 IntelligibilityEnhancer::AudioSource source)
52 : parent_(parent), source_(source) { 52 : parent_(parent), source_(source) {
53 } 53 }
54 54
55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( 55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
56 const complex<float>* const* in_block, 56 const complex<float>* const* in_block,
57 int in_channels, 57 int in_channels,
58 int frames, 58 size_t frames,
59 int /* out_channels */, 59 int /* out_channels */,
60 complex<float>* const* out_block) { 60 complex<float>* const* out_block) {
61 DCHECK_EQ(parent_->freqs_, frames); 61 DCHECK_EQ(parent_->freqs_, frames);
62 for (int i = 0; i < in_channels; ++i) { 62 for (int i = 0; i < in_channels; ++i) {
63 parent_->DispatchAudio(source_, in_block[i], out_block[i]); 63 parent_->DispatchAudio(source_, in_block[i], out_block[i]);
64 } 64 }
65 } 65 }
66 66
67 IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution, 67 IntelligibilityEnhancer::IntelligibilityEnhancer(size_t erb_resolution,
68 int sample_rate_hz, 68 int sample_rate_hz,
69 int channels, 69 int channels,
70 int cv_type, 70 int cv_type,
71 float cv_alpha, 71 float cv_alpha,
72 int cv_win, 72 size_t cv_win,
73 int analysis_rate, 73 int analysis_rate,
74 int variance_rate, 74 int variance_rate,
75 float gain_limit) 75 float gain_limit)
76 : freqs_(RealFourier::ComplexLength( 76 : freqs_(RealFourier::ComplexLength(
77 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), 77 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
78 window_size_(1 << RealFourier::FftOrder(freqs_)), 78 window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
79 chunk_length_(sample_rate_hz * kChunkSizeMs / 1000), 79 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
80 bank_size_(GetBankSize(sample_rate_hz, erb_resolution)), 80 bank_size_(GetBankSize(sample_rate_hz, erb_resolution)),
81 sample_rate_hz_(sample_rate_hz), 81 sample_rate_hz_(sample_rate_hz),
82 erb_resolution_(erb_resolution), 82 erb_resolution_(erb_resolution),
83 channels_(channels), 83 channels_(channels),
84 analysis_rate_(analysis_rate), 84 analysis_rate_(analysis_rate),
85 variance_rate_(variance_rate), 85 variance_rate_(variance_rate),
86 clear_variance_(freqs_, 86 clear_variance_(freqs_,
87 static_cast<VarianceType>(cv_type), 87 static_cast<VarianceType>(cv_type),
88 cv_win, 88 cv_win,
89 cv_alpha), 89 cv_alpha),
(...skipping 27 matching lines...) Expand all
117 temp_out_buffer_ = static_cast<float**>( 117 temp_out_buffer_ = static_cast<float**>(
118 malloc(sizeof(*temp_out_buffer_) * channels_ + 118 malloc(sizeof(*temp_out_buffer_) * channels_ +
119 sizeof(**temp_out_buffer_) * chunk_length_ * channels_)); 119 sizeof(**temp_out_buffer_) * chunk_length_ * channels_));
120 for (int i = 0; i < channels_; ++i) { 120 for (int i = 0; i < channels_; ++i) {
121 temp_out_buffer_[i] = 121 temp_out_buffer_[i] =
122 reinterpret_cast<float*>(temp_out_buffer_ + channels_) + 122 reinterpret_cast<float*>(temp_out_buffer_ + channels_) +
123 chunk_length_ * i; 123 chunk_length_ * i;
124 } 124 }
125 125
126 // Assumes all rho equal. 126 // Assumes all rho equal.
127 for (int i = 0; i < bank_size_; ++i) { 127 for (size_t i = 0; i < bank_size_; ++i) {
128 rho_[i] = kConfigRho * kConfigRho; 128 rho_[i] = kConfigRho * kConfigRho;
129 } 129 }
130 130
131 float freqs_khz = kClipFreq / 1000.0f; 131 float freqs_khz = kClipFreq / 1000.0f;
132 int erb_index = static_cast<int>(ceilf( 132 size_t erb_index = static_cast<size_t>(ceilf(
133 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); 133 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
134 start_freq_ = std::max(1, erb_index * erb_resolution); 134 start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution);
135 135
136 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, 136 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
137 kbd_window_.get()); 137 kbd_window_.get());
138 render_mangler_.reset(new LappedTransform( 138 render_mangler_.reset(new LappedTransform(
139 channels_, channels_, chunk_length_, kbd_window_.get(), window_size_, 139 channels_, channels_, chunk_length_, kbd_window_.get(), window_size_,
140 window_size_ / 2, &render_callback_)); 140 window_size_ / 2, &render_callback_));
141 capture_mangler_.reset(new LappedTransform( 141 capture_mangler_.reset(new LappedTransform(
142 channels_, channels_, chunk_length_, kbd_window_.get(), window_size_, 142 channels_, channels_, chunk_length_, kbd_window_.get(), window_size_,
143 window_size_ / 2, &capture_callback_)); 143 window_size_ / 2, &capture_callback_));
144 } 144 }
145 145
146 IntelligibilityEnhancer::~IntelligibilityEnhancer() { 146 IntelligibilityEnhancer::~IntelligibilityEnhancer() {
147 WebRtcVad_Free(vad_low_); 147 WebRtcVad_Free(vad_low_);
148 WebRtcVad_Free(vad_high_); 148 WebRtcVad_Free(vad_high_);
149 free(temp_out_buffer_); 149 free(temp_out_buffer_);
150 } 150 }
151 151
152 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) { 152 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) {
153 for (int i = 0; i < chunk_length_; ++i) { 153 for (size_t i = 0; i < chunk_length_; ++i) {
154 vad_tmp_buffer_[i] = (int16_t)audio[0][i]; 154 vad_tmp_buffer_[i] = (int16_t)audio[0][i];
155 } 155 }
156 has_voice_low_ = WebRtcVad_Process(vad_low_, sample_rate_hz_, 156 has_voice_low_ = WebRtcVad_Process(vad_low_, sample_rate_hz_,
157 vad_tmp_buffer_.get(), chunk_length_) == 1; 157 vad_tmp_buffer_.get(), chunk_length_) == 1;
158 158
159 // Process and enhance chunk of |audio| 159 // Process and enhance chunk of |audio|
160 render_mangler_->ProcessChunk(audio, temp_out_buffer_); 160 render_mangler_->ProcessChunk(audio, temp_out_buffer_);
161 161
162 for (int i = 0; i < channels_; ++i) { 162 for (int i = 0; i < channels_; ++i) {
163 memcpy(audio[i], temp_out_buffer_[i], 163 memcpy(audio[i], temp_out_buffer_[i],
164 chunk_length_ * sizeof(**temp_out_buffer_)); 164 chunk_length_ * sizeof(**temp_out_buffer_));
165 } 165 }
166 } 166 }
167 167
168 void IntelligibilityEnhancer::ProcessCaptureAudio(float* const* audio) { 168 void IntelligibilityEnhancer::ProcessCaptureAudio(float* const* audio) {
169 for (int i = 0; i < chunk_length_; ++i) { 169 for (size_t i = 0; i < chunk_length_; ++i) {
170 vad_tmp_buffer_[i] = (int16_t)audio[0][i]; 170 vad_tmp_buffer_[i] = (int16_t)audio[0][i];
171 } 171 }
172 // TODO(bercic): The VAD was always detecting voice in the noise stream, 172 // TODO(bercic): The VAD was always detecting voice in the noise stream,
173 // no matter what the aggressiveness, so it was temporarily disabled here. 173 // no matter what the aggressiveness, so it was temporarily disabled here.
174 174
175 #if 0 175 #if 0
176 if (WebRtcVad_Process(vad_high_, sample_rate_hz_, vad_tmp_buffer_.get(), 176 if (WebRtcVad_Process(vad_high_, sample_rate_hz_, vad_tmp_buffer_.get(),
177 chunk_length_) == 1) { 177 chunk_length_) == 1) {
178 printf("capture HAS speech\n"); 178 printf("capture HAS speech\n");
179 return; 179 return;
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 lambda_top = lambda; 269 lambda_top = lambda;
270 } 270 }
271 power_ratio = std::fabs(power * reciprocal_power_target); 271 power_ratio = std::fabs(power * reciprocal_power_target);
272 ++iters; 272 ++iters;
273 } 273 }
274 } 274 }
275 275
276 void IntelligibilityEnhancer::UpdateErbGains() { 276 void IntelligibilityEnhancer::UpdateErbGains() {
277 // (ERB gain) = filterbank' * (freq gain) 277 // (ERB gain) = filterbank' * (freq gain)
278 float* gains = gain_applier_.target(); 278 float* gains = gain_applier_.target();
279 for (int i = 0; i < freqs_; ++i) { 279 for (size_t i = 0; i < freqs_; ++i) {
280 gains[i] = 0.0f; 280 gains[i] = 0.0f;
281 for (int j = 0; j < bank_size_; ++j) { 281 for (size_t j = 0; j < bank_size_; ++j) {
282 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); 282 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
283 } 283 }
284 } 284 }
285 } 285 }
286 286
287 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, 287 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,
288 complex<float>* /*out_block*/) { 288 complex<float>* /*out_block*/) {
289 noise_variance_.Step(in_block); 289 noise_variance_.Step(in_block);
290 } 290 }
291 291
292 int IntelligibilityEnhancer::GetBankSize(int sample_rate, int erb_resolution) { 292 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
293 size_t erb_resolution) {
293 float freq_limit = sample_rate / 2000.0f; 294 float freq_limit = sample_rate / 2000.0f;
294 int erb_scale = ceilf( 295 size_t erb_scale = static_cast<size_t>(ceilf(
295 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f); 296 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
296 return erb_scale * erb_resolution; 297 return erb_scale * erb_resolution;
297 } 298 }
298 299
299 void IntelligibilityEnhancer::CreateErbBank() { 300 void IntelligibilityEnhancer::CreateErbBank() {
300 int lf = 1, rf = 4; 301 size_t lf = 1, rf = 4;
301 302
302 for (int i = 0; i < bank_size_; ++i) { 303 for (size_t i = 0; i < bank_size_; ++i) {
303 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); 304 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
304 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); 305 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
305 center_freqs_[i] -= 14678.49f; 306 center_freqs_[i] -= 14678.49f;
306 } 307 }
307 float last_center_freq = center_freqs_[bank_size_ - 1]; 308 float last_center_freq = center_freqs_[bank_size_ - 1];
308 for (int i = 0; i < bank_size_; ++i) { 309 for (size_t i = 0; i < bank_size_; ++i) {
309 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; 310 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
310 } 311 }
311 312
312 for (int i = 0; i < bank_size_; ++i) { 313 for (size_t i = 0; i < bank_size_; ++i) {
313 filter_bank_[i].resize(freqs_); 314 filter_bank_[i].resize(freqs_);
314 } 315 }
315 316
316 for (int i = 1; i <= bank_size_; ++i) { 317 for (size_t i = 1; i <= bank_size_; ++i) {
317 int lll, ll, rr, rrr; 318 size_t lll, ll, rr, rrr;
318 lll = round(center_freqs_[max(1, i - lf) - 1] * freqs_ / 319 static const size_t kOne = 1; // Avoids repeated static_cast<>s below.
319 (0.5f * sample_rate_hz_)); 320 lll = static_cast<size_t>(round(
320 ll = 321 center_freqs_[max(kOne, i - lf) - 1] * freqs_ /
321 round(center_freqs_[max(1, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)); 322 (0.5f * sample_rate_hz_)));
322 lll = min(freqs_, max(lll, 1)) - 1; 323 ll = static_cast<size_t>(round(
323 ll = min(freqs_, max(ll, 1)) - 1; 324 center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));
325 lll = min(freqs_, max(lll, kOne)) - 1;
326 ll = min(freqs_, max(ll, kOne)) - 1;
324 327
325 rrr = round(center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / 328 rrr = static_cast<size_t>(round(
326 (0.5f * sample_rate_hz_)); 329 center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
327 rr = round(center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / 330 (0.5f * sample_rate_hz_)));
328 (0.5f * sample_rate_hz_)); 331 rr = static_cast<size_t>(round(
329 rrr = min(freqs_, max(rrr, 1)) - 1; 332 center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
330 rr = min(freqs_, max(rr, 1)) - 1; 333 (0.5f * sample_rate_hz_)));
334 rrr = min(freqs_, max(rrr, kOne)) - 1;
335 rr = min(freqs_, max(rr, kOne)) - 1;
331 336
332 float step, element; 337 float step, element;
333 338
334 step = 1.0f / (ll - lll); 339 step = 1.0f / (ll - lll);
335 element = 0.0f; 340 element = 0.0f;
336 for (int j = lll; j <= ll; ++j) { 341 for (size_t j = lll; j <= ll; ++j) {
337 filter_bank_[i - 1][j] = element; 342 filter_bank_[i - 1][j] = element;
338 element += step; 343 element += step;
339 } 344 }
340 step = 1.0f / (rrr - rr); 345 step = 1.0f / (rrr - rr);
341 element = 1.0f; 346 element = 1.0f;
342 for (int j = rr; j <= rrr; ++j) { 347 for (size_t j = rr; j <= rrr; ++j) {
343 filter_bank_[i - 1][j] = element; 348 filter_bank_[i - 1][j] = element;
344 element -= step; 349 element -= step;
345 } 350 }
346 for (int j = ll; j <= rr; ++j) { 351 for (size_t j = ll; j <= rr; ++j) {
347 filter_bank_[i - 1][j] = 1.0f; 352 filter_bank_[i - 1][j] = 1.0f;
348 } 353 }
349 } 354 }
350 355
351 float sum; 356 float sum;
352 for (int i = 0; i < freqs_; ++i) { 357 for (size_t i = 0; i < freqs_; ++i) {
353 sum = 0.0f; 358 sum = 0.0f;
354 for (int j = 0; j < bank_size_; ++j) { 359 for (size_t j = 0; j < bank_size_; ++j) {
355 sum += filter_bank_[j][i]; 360 sum += filter_bank_[j][i];
356 } 361 }
357 for (int j = 0; j < bank_size_; ++j) { 362 for (size_t j = 0; j < bank_size_; ++j) {
358 filter_bank_[j][i] /= sum; 363 filter_bank_[j][i] /= sum;
359 } 364 }
360 } 365 }
361 } 366 }
362 367
363 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, 368 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
364 int start_freq, 369 size_t start_freq,
365 float* sols) { 370 float* sols) {
366 bool quadratic = (kConfigRho < 1.0f); 371 bool quadratic = (kConfigRho < 1.0f);
367 const float* var_x0 = filtered_clear_var_.get(); 372 const float* var_x0 = filtered_clear_var_.get();
368 const float* var_n0 = filtered_noise_var_.get(); 373 const float* var_n0 = filtered_noise_var_.get();
369 374
370 for (int n = 0; n < start_freq; ++n) { 375 for (size_t n = 0; n < start_freq; ++n) {
371 sols[n] = 1.0f; 376 sols[n] = 1.0f;
372 } 377 }
373 378
374 // Analytic solution for optimal gains. See paper for derivation. 379 // Analytic solution for optimal gains. See paper for derivation.
375 for (int n = start_freq - 1; n < bank_size_; ++n) { 380 for (size_t n = start_freq - 1; n < bank_size_; ++n) {
376 float alpha0, beta0, gamma0; 381 float alpha0, beta0, gamma0;
377 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + 382 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +
378 lambda * var_x0[n] * var_n0[n] * var_n0[n]; 383 lambda * var_x0[n] * var_n0[n] * var_n0[n];
379 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n]; 384 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n];
380 if (quadratic) { 385 if (quadratic) {
381 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; 386 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n];
382 sols[n] = 387 sols[n] =
383 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); 388 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0);
384 } else { 389 } else {
385 sols[n] = -gamma0 / beta0; 390 sols[n] = -gamma0 / beta0;
386 } 391 }
387 sols[n] = fmax(0, sols[n]); 392 sols[n] = fmax(0, sols[n]);
388 } 393 }
389 } 394 }
390 395
391 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { 396 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
392 DCHECK_GT(freqs_, 0); 397 DCHECK_GT(freqs_, 0u);
393 for (int i = 0; i < bank_size_; ++i) { 398 for (size_t i = 0; i < bank_size_; ++i) {
394 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); 399 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);
395 } 400 }
396 } 401 }
397 402
398 float IntelligibilityEnhancer::DotProduct(const float* a, 403 float IntelligibilityEnhancer::DotProduct(const float* a,
399 const float* b, 404 const float* b,
400 int length) { 405 size_t length) {
401 float ret = 0.0f; 406 float ret = 0.0f;
402 407
403 for (int i = 0; i < length; ++i) { 408 for (size_t i = 0; i < length; ++i) {
404 ret = fmaf(a[i], b[i], ret); 409 ret = fmaf(a[i], b[i], ret);
405 } 410 }
406 return ret; 411 return ret;
407 } 412 }
408 413
409 } // namespace webrtc 414 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698