Chromium Code Reviews| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
| index dbb7e638b2d063c52e876ec53df090893e2d4d4c..98b9bc7dfa2bfc45dbf4fe6b06baebd503490a12 100644 |
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
| @@ -55,7 +55,7 @@ IntelligibilityEnhancer::TransformCallback::TransformCallback( |
| void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( |
| const complex<float>* const* in_block, |
| int in_channels, |
| - int frames, |
| + size_t frames, |
| int /* out_channels */, |
| complex<float>* const* out_block) { |
| DCHECK_EQ(parent_->freqs_, frames); |
| @@ -64,19 +64,19 @@ void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( |
| } |
| } |
| -IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution, |
| +IntelligibilityEnhancer::IntelligibilityEnhancer(size_t erb_resolution, |
| int sample_rate_hz, |
| int channels, |
| int cv_type, |
| float cv_alpha, |
| - int cv_win, |
| + size_t cv_win, |
| int analysis_rate, |
| int variance_rate, |
| float gain_limit) |
| : freqs_(RealFourier::ComplexLength( |
| RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
| - window_size_(1 << RealFourier::FftOrder(freqs_)), |
| - chunk_length_(sample_rate_hz * kChunkSizeMs / 1000), |
| + window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))), |
| + chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), |
| bank_size_(GetBankSize(sample_rate_hz, erb_resolution)), |
| sample_rate_hz_(sample_rate_hz), |
| erb_resolution_(erb_resolution), |
| @@ -124,14 +124,14 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution, |
| } |
| // Assumes all rho equal. |
| - for (int i = 0; i < bank_size_; ++i) { |
| + for (size_t i = 0; i < bank_size_; ++i) { |
| rho_[i] = kConfigRho * kConfigRho; |
| } |
| float freqs_khz = kClipFreq / 1000.0f; |
| - int erb_index = static_cast<int>(ceilf( |
| + size_t erb_index = static_cast<size_t>(ceilf( |
| 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); |
| - start_freq_ = std::max(1, erb_index * erb_resolution); |
| + start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution); |
| WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, |
| kbd_window_.get()); |
| @@ -150,7 +150,7 @@ IntelligibilityEnhancer::~IntelligibilityEnhancer() { |
| } |
| void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) { |
| - for (int i = 0; i < chunk_length_; ++i) { |
| + for (size_t i = 0; i < chunk_length_; ++i) { |
| vad_tmp_buffer_[i] = (int16_t)audio[0][i]; |
| } |
| has_voice_low_ = WebRtcVad_Process(vad_low_, sample_rate_hz_, |
| @@ -166,7 +166,7 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) { |
| } |
| void IntelligibilityEnhancer::ProcessCaptureAudio(float* const* audio) { |
| - for (int i = 0; i < chunk_length_; ++i) { |
| + for (size_t i = 0; i < chunk_length_; ++i) { |
| vad_tmp_buffer_[i] = (int16_t)audio[0][i]; |
| } |
| // TODO(bercic): The VAD was always detecting voice in the noise stream, |
| @@ -276,9 +276,9 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target, |
| void IntelligibilityEnhancer::UpdateErbGains() { |
| // (ERB gain) = filterbank' * (freq gain) |
| float* gains = gain_applier_.target(); |
| - for (int i = 0; i < freqs_; ++i) { |
| + for (size_t i = 0; i < freqs_; ++i) { |
| gains[i] = 0.0f; |
| - for (int j = 0; j < bank_size_; ++j) { |
| + for (size_t j = 0; j < bank_size_; ++j) { |
| gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); |
| } |
| } |
| @@ -289,90 +289,95 @@ void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, |
| noise_variance_.Step(in_block); |
| } |
| -int IntelligibilityEnhancer::GetBankSize(int sample_rate, int erb_resolution) { |
| +size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, |
| + size_t erb_resolution) { |
| float freq_limit = sample_rate / 2000.0f; |
| - int erb_scale = ceilf( |
| - 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f); |
| + size_t erb_scale = static_cast<size_t>(ceilf( |
| + 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f)); |
| return erb_scale * erb_resolution; |
| } |
| void IntelligibilityEnhancer::CreateErbBank() { |
| - int lf = 1, rf = 4; |
| + size_t lf = 1, rf = 4; |
|
Andrew MacDonald
2015/07/24 04:01:43
Sorry to make you do this, but could you replace l
ekm
2015/07/24 06:29:16
Sorry about this. This function is a direct port o
Peter Kasting
2015/07/24 06:44:22
Based on this, I won't touch these.
|
| - for (int i = 0; i < bank_size_; ++i) { |
| + for (size_t i = 0; i < bank_size_; ++i) { |
| float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); |
| center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); |
|
Andrew MacDonald
2015/07/24 04:01:44
Gah, what are these magic numbers? Elliot, please
ekm
2015/07/24 06:29:16
This is the magic formula for inverse ERBS from ps
|
| center_freqs_[i] -= 14678.49f; |
| } |
| float last_center_freq = center_freqs_[bank_size_ - 1]; |
| - for (int i = 0; i < bank_size_; ++i) { |
| + for (size_t i = 0; i < bank_size_; ++i) { |
| center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; |
| } |
| - for (int i = 0; i < bank_size_; ++i) { |
| + for (size_t i = 0; i < bank_size_; ++i) { |
| filter_bank_[i].resize(freqs_); |
| } |
| - for (int i = 1; i <= bank_size_; ++i) { |
| - int lll, ll, rr, rrr; |
| - lll = round(center_freqs_[max(1, i - lf) - 1] * freqs_ / |
| - (0.5f * sample_rate_hz_)); |
| - ll = |
| - round(center_freqs_[max(1, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)); |
| - lll = min(freqs_, max(lll, 1)) - 1; |
| - ll = min(freqs_, max(ll, 1)) - 1; |
| - |
| - rrr = round(center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / |
| - (0.5f * sample_rate_hz_)); |
| - rr = round(center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / |
| - (0.5f * sample_rate_hz_)); |
| - rrr = min(freqs_, max(rrr, 1)) - 1; |
| - rr = min(freqs_, max(rr, 1)) - 1; |
| + for (size_t i = 1; i <= bank_size_; ++i) { |
| + size_t lll, ll, rr, rrr; |
|
Andrew MacDonald
2015/07/24 04:01:44
This is horrific. Elliot, could you look into usin
ekm
2015/07/24 06:29:16
Agreed; will do. They correspond from left-to-righ
|
| + static const size_t kOne = 1; // Avoids repeated static_cast<>s below. |
| + lll = static_cast<size_t>(round( |
| + center_freqs_[max(one, i - lf) - 1] * freqs_ / |
|
Andrew MacDonald
2015/07/24 04:01:44
kOne, but why not just use a literal? "1u"
Peter Kasting
2015/07/24 06:44:22
1u causes compile failures :( (i tried that first
Peter Kasting
2015/07/27 23:09:54
Correctly copied over the kOne usage from the full
|
| + (0.5f * sample_rate_hz_))); |
| + ll = static_cast<size_t>(round( |
| + center_freqs_[max(one, i) - 1] * freqs_ / (0.5f * sample_rate_hz_))); |
| + lll = min(freqs_, max(lll, one)) - 1; |
| + ll = min(freqs_, max(ll, one)) - 1; |
| + |
| + rrr = static_cast<size_t>(round( |
| + center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / |
| + (0.5f * sample_rate_hz_))); |
| + rr = static_cast<size_t>(round( |
| + center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / |
| + (0.5f * sample_rate_hz_))); |
| + rrr = min(freqs_, max(rrr, one)) - 1; |
| + rr = min(freqs_, max(rr, one)) - 1; |
| float step, element; |
| step = 1.0f / (ll - lll); |
| element = 0.0f; |
| - for (int j = lll; j <= ll; ++j) { |
| + for (size_t j = lll; j <= ll; ++j) { |
| filter_bank_[i - 1][j] = element; |
| element += step; |
| } |
| step = 1.0f / (rrr - rr); |
| element = 1.0f; |
| - for (int j = rr; j <= rrr; ++j) { |
| + for (size_t j = rr; j <= rrr; ++j) { |
| filter_bank_[i - 1][j] = element; |
| element -= step; |
| } |
| - for (int j = ll; j <= rr; ++j) { |
| + for (size_t j = ll; j <= rr; ++j) { |
| filter_bank_[i - 1][j] = 1.0f; |
| } |
| } |
| float sum; |
| - for (int i = 0; i < freqs_; ++i) { |
| + for (size_t i = 0; i < freqs_; ++i) { |
| sum = 0.0f; |
| - for (int j = 0; j < bank_size_; ++j) { |
| + for (size_t j = 0; j < bank_size_; ++j) { |
| sum += filter_bank_[j][i]; |
| } |
| - for (int j = 0; j < bank_size_; ++j) { |
| + for (size_t j = 0; j < bank_size_; ++j) { |
| filter_bank_[j][i] /= sum; |
| } |
| } |
| } |
| void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
| - int start_freq, |
| + size_t start_freq, |
| float* sols) { |
| bool quadratic = (kConfigRho < 1.0f); |
| const float* var_x0 = filtered_clear_var_.get(); |
| const float* var_n0 = filtered_noise_var_.get(); |
| - for (int n = 0; n < start_freq; ++n) { |
| + for (size_t n = 0; n < start_freq; ++n) { |
| sols[n] = 1.0f; |
| } |
| // Analytic solution for optimal gains. See paper for derivation. |
| - for (int n = start_freq - 1; n < bank_size_; ++n) { |
| + for (size_t n = start_freq - 1; n < bank_size_; ++n) { |
| float alpha0, beta0, gamma0; |
| gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + |
| lambda * var_x0[n] * var_n0[n] * var_n0[n]; |
| @@ -389,18 +394,18 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
| } |
| void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { |
| - DCHECK_GT(freqs_, 0); |
| - for (int i = 0; i < bank_size_; ++i) { |
| + DCHECK_GT(freqs_, 0u); |
| + for (size_t i = 0; i < bank_size_; ++i) { |
| result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); |
| } |
| } |
| float IntelligibilityEnhancer::DotProduct(const float* a, |
| const float* b, |
| - int length) { |
| + size_t length) { |
| float ret = 0.0f; |
| - for (int i = 0; i < length; ++i) { |
| + for (size_t i = 0; i < length; ++i) { |
| ret = fmaf(a[i], b[i], ret); |
| } |
| return ret; |