| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| index 8eccde452c359cd49c0a29096a1cfc9628b261a7..33ff5cda8712f3bdce60a45205f120447bb534ce 100644
|
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| @@ -30,7 +30,7 @@ namespace webrtc {
|
|
|
| namespace {
|
|
|
| -const int kErbResolution = 2;
|
| +const size_t kErbResolution = 2;
|
| const int kWindowSizeMs = 2;
|
| const int kChunkSizeMs = 10; // Size provided by APM.
|
| const float kClipFreq = 200.0f;
|
| @@ -55,7 +55,7 @@ IntelligibilityEnhancer::TransformCallback::TransformCallback(
|
| void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
|
| const complex<float>* const* in_block,
|
| int in_channels,
|
| - int frames,
|
| + size_t frames,
|
| int /* out_channels */,
|
| complex<float>* const* out_block) {
|
| DCHECK_EQ(parent_->freqs_, frames);
|
| @@ -71,8 +71,9 @@ IntelligibilityEnhancer::IntelligibilityEnhancer()
|
| IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
|
| : freqs_(RealFourier::ComplexLength(
|
| RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
|
| - window_size_(1 << RealFourier::FftOrder(freqs_)),
|
| - chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000),
|
| + window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
|
| + chunk_length_(
|
| + static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),
|
| bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
|
| sample_rate_hz_(config.sample_rate_hz),
|
| erb_resolution_(kErbResolution),
|
| @@ -107,14 +108,14 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
|
| CreateErbBank();
|
|
|
| // Assumes all rho equal.
|
| - for (int i = 0; i < bank_size_; ++i) {
|
| + for (size_t i = 0; i < bank_size_; ++i) {
|
| rho_[i] = config.rho * config.rho;
|
| }
|
|
|
| float freqs_khz = kClipFreq / 1000.0f;
|
| - int erb_index = static_cast<int>(ceilf(
|
| + size_t erb_index = static_cast<size_t>(ceilf(
|
| 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
|
| - start_freq_ = max(1, erb_index * erb_resolution_);
|
| + start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);
|
|
|
| WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
|
| kbd_window_.get());
|
| @@ -238,9 +239,9 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target,
|
| void IntelligibilityEnhancer::UpdateErbGains() {
|
| // (ERB gain) = filterbank' * (freq gain)
|
| float* gains = gain_applier_.target();
|
| - for (int i = 0; i < freqs_; ++i) {
|
| + for (size_t i = 0; i < freqs_; ++i) {
|
| gains[i] = 0.0f;
|
| - for (int j = 0; j < bank_size_; ++j) {
|
| + for (size_t j = 0; j < bank_size_; ++j) {
|
| gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
|
| }
|
| }
|
| @@ -251,90 +252,95 @@ void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,
|
| noise_variance_.Step(in_block);
|
| }
|
|
|
| -int IntelligibilityEnhancer::GetBankSize(int sample_rate, int erb_resolution) {
|
| +size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
|
| + size_t erb_resolution) {
|
| float freq_limit = sample_rate / 2000.0f;
|
| - int erb_scale = ceilf(
|
| - 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f);
|
| + size_t erb_scale = static_cast<size_t>(ceilf(
|
| + 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
|
| return erb_scale * erb_resolution;
|
| }
|
|
|
| void IntelligibilityEnhancer::CreateErbBank() {
|
| - int lf = 1, rf = 4;
|
| + size_t lf = 1, rf = 4;
|
|
|
| - for (int i = 0; i < bank_size_; ++i) {
|
| + for (size_t i = 0; i < bank_size_; ++i) {
|
| float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
|
| center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
|
| center_freqs_[i] -= 14678.49f;
|
| }
|
| float last_center_freq = center_freqs_[bank_size_ - 1];
|
| - for (int i = 0; i < bank_size_; ++i) {
|
| + for (size_t i = 0; i < bank_size_; ++i) {
|
| center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
|
| }
|
|
|
| - for (int i = 0; i < bank_size_; ++i) {
|
| + for (size_t i = 0; i < bank_size_; ++i) {
|
| filter_bank_[i].resize(freqs_);
|
| }
|
|
|
| - for (int i = 1; i <= bank_size_; ++i) {
|
| - int lll, ll, rr, rrr;
|
| - lll = round(center_freqs_[max(1, i - lf) - 1] * freqs_ /
|
| - (0.5f * sample_rate_hz_));
|
| - ll =
|
| - round(center_freqs_[max(1, i) - 1] * freqs_ / (0.5f * sample_rate_hz_));
|
| - lll = min(freqs_, max(lll, 1)) - 1;
|
| - ll = min(freqs_, max(ll, 1)) - 1;
|
| -
|
| - rrr = round(center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
|
| - (0.5f * sample_rate_hz_));
|
| - rr = round(center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
|
| - (0.5f * sample_rate_hz_));
|
| - rrr = min(freqs_, max(rrr, 1)) - 1;
|
| - rr = min(freqs_, max(rr, 1)) - 1;
|
| + for (size_t i = 1; i <= bank_size_; ++i) {
|
| + size_t lll, ll, rr, rrr;
|
| + static const size_t kOne = 1; // Avoids repeated static_cast<>s below.
|
| + lll = static_cast<size_t>(round(
|
| + center_freqs_[max(kOne, i - lf) - 1] * freqs_ /
|
| + (0.5f * sample_rate_hz_)));
|
| + ll = static_cast<size_t>(round(
|
| + center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));
|
| + lll = min(freqs_, max(lll, kOne)) - 1;
|
| + ll = min(freqs_, max(ll, kOne)) - 1;
|
| +
|
| + rrr = static_cast<size_t>(round(
|
| + center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
|
| + (0.5f * sample_rate_hz_)));
|
| + rr = static_cast<size_t>(round(
|
| + center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
|
| + (0.5f * sample_rate_hz_)));
|
| + rrr = min(freqs_, max(rrr, kOne)) - 1;
|
| + rr = min(freqs_, max(rr, kOne)) - 1;
|
|
|
| float step, element;
|
|
|
| step = 1.0f / (ll - lll);
|
| element = 0.0f;
|
| - for (int j = lll; j <= ll; ++j) {
|
| + for (size_t j = lll; j <= ll; ++j) {
|
| filter_bank_[i - 1][j] = element;
|
| element += step;
|
| }
|
| step = 1.0f / (rrr - rr);
|
| element = 1.0f;
|
| - for (int j = rr; j <= rrr; ++j) {
|
| + for (size_t j = rr; j <= rrr; ++j) {
|
| filter_bank_[i - 1][j] = element;
|
| element -= step;
|
| }
|
| - for (int j = ll; j <= rr; ++j) {
|
| + for (size_t j = ll; j <= rr; ++j) {
|
| filter_bank_[i - 1][j] = 1.0f;
|
| }
|
| }
|
|
|
| float sum;
|
| - for (int i = 0; i < freqs_; ++i) {
|
| + for (size_t i = 0; i < freqs_; ++i) {
|
| sum = 0.0f;
|
| - for (int j = 0; j < bank_size_; ++j) {
|
| + for (size_t j = 0; j < bank_size_; ++j) {
|
| sum += filter_bank_[j][i];
|
| }
|
| - for (int j = 0; j < bank_size_; ++j) {
|
| + for (size_t j = 0; j < bank_size_; ++j) {
|
| filter_bank_[j][i] /= sum;
|
| }
|
| }
|
| }
|
|
|
| void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
| - int start_freq,
|
| + size_t start_freq,
|
| float* sols) {
|
| bool quadratic = (kConfigRho < 1.0f);
|
| const float* var_x0 = filtered_clear_var_.get();
|
| const float* var_n0 = filtered_noise_var_.get();
|
|
|
| - for (int n = 0; n < start_freq; ++n) {
|
| + for (size_t n = 0; n < start_freq; ++n) {
|
| sols[n] = 1.0f;
|
| }
|
|
|
| // Analytic solution for optimal gains. See paper for derivation.
|
| - for (int n = start_freq - 1; n < bank_size_; ++n) {
|
| + for (size_t n = start_freq - 1; n < bank_size_; ++n) {
|
| float alpha0, beta0, gamma0;
|
| gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +
|
| lambda * var_x0[n] * var_n0[n] * var_n0[n];
|
| @@ -351,18 +357,18 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
| }
|
|
|
| void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
|
| - DCHECK_GT(freqs_, 0);
|
| - for (int i = 0; i < bank_size_; ++i) {
|
| + DCHECK_GT(freqs_, 0u);
|
| + for (size_t i = 0; i < bank_size_; ++i) {
|
| result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);
|
| }
|
| }
|
|
|
| float IntelligibilityEnhancer::DotProduct(const float* a,
|
| const float* b,
|
| - int length) {
|
| + size_t length) {
|
| float ret = 0.0f;
|
|
|
| - for (int i = 0; i < length; ++i) {
|
| + for (size_t i = 0; i < length; ++i) {
|
| ret = fmaf(a[i], b[i], ret);
|
| }
|
| return ret;
|
|
|