| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| index 38a7ea32cf5c92093229b983c4a51959ca117fa5..d8f95edcf667e088b8c376140cc08c52e37a8f28 100644
|
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
|
| @@ -54,29 +54,12 @@ void MapToErbBands(const float* pow,
|
| float* result) {
|
| for (size_t i = 0; i < filter_bank.size(); ++i) {
|
| RTC_DCHECK_GT(filter_bank[i].size(), 0u);
|
| - result[i] = DotProduct(&filter_bank[i][0], pow, filter_bank[i].size());
|
| + result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
|
| }
|
| }
|
|
|
| } // namespace
|
|
|
| -IntelligibilityEnhancer::TransformCallback::TransformCallback(
|
| - IntelligibilityEnhancer* parent)
|
| - : parent_(parent) {
|
| -}
|
| -
|
| -void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
|
| - const std::complex<float>* const* in_block,
|
| - size_t in_channels,
|
| - size_t frames,
|
| - size_t /* out_channels */,
|
| - std::complex<float>* const* out_block) {
|
| - RTC_DCHECK_EQ(parent_->freqs_, frames);
|
| - for (size_t i = 0; i < in_channels; ++i) {
|
| - parent_->ProcessClearBlock(in_block[i], out_block[i]);
|
| - }
|
| -}
|
| -
|
| IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
|
| size_t num_render_channels)
|
| : freqs_(RealFourier::ComplexLength(
|
| @@ -88,24 +71,17 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
|
| clear_power_estimator_(freqs_, kDecayRate),
|
| noise_power_estimator_(
|
| new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)),
|
| - filtered_clear_pow_(new float[bank_size_]),
|
| - filtered_noise_pow_(new float[bank_size_]),
|
| - center_freqs_(new float[bank_size_]),
|
| + filtered_clear_pow_(bank_size_, 0.f),
|
| + filtered_noise_pow_(bank_size_, 0.f),
|
| + center_freqs_(bank_size_),
|
| render_filter_bank_(CreateErbBank(freqs_)),
|
| - gains_eq_(new float[bank_size_]),
|
| + gains_eq_(bank_size_),
|
| gain_applier_(freqs_, kMaxRelativeGainChange),
|
| - temp_render_out_buffer_(chunk_length_, num_render_channels_),
|
| - render_callback_(this),
|
| audio_s16_(chunk_length_),
|
| chunks_since_voice_(kSpeechOffsetDelay),
|
| is_speech_(false) {
|
| RTC_DCHECK_LE(kRho, 1.f);
|
|
|
| - memset(filtered_clear_pow_.get(), 0,
|
| - bank_size_ * sizeof(filtered_clear_pow_[0]));
|
| - memset(filtered_noise_pow_.get(), 0,
|
| - bank_size_ * sizeof(filtered_noise_pow_[0]));
|
| -
|
| const size_t erb_index = static_cast<size_t>(
|
| ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
|
| 43.f));
|
| @@ -113,10 +89,11 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
|
|
|
| size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_));
|
| std::vector<float> kbd_window(window_size);
|
| - WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, &kbd_window[0]);
|
| + WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
|
| + kbd_window.data());
|
| render_mangler_.reset(new LappedTransform(
|
| - num_render_channels_, num_render_channels_, chunk_length_, &kbd_window[0],
|
| - window_size, window_size / 2, &render_callback_));
|
| + num_render_channels_, num_render_channels_, chunk_length_,
|
| + kbd_window.data(), window_size, window_size / 2, this));
|
| }
|
|
|
| void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
|
| @@ -127,7 +104,7 @@ void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
|
| noise_power_estimator_.reset(
|
| new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate));
|
| }
|
| - noise_power_estimator_->Step(&noise[0]);
|
| + noise_power_estimator_->Step(noise.data());
|
| }
|
|
|
| void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
|
| @@ -136,38 +113,40 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
|
| RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
|
| RTC_CHECK_EQ(num_render_channels_, num_channels);
|
| is_speech_ = IsSpeech(audio[0]);
|
| - render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
|
| - for (size_t i = 0; i < num_render_channels_; ++i) {
|
| - memcpy(audio[i], temp_render_out_buffer_.channels()[i],
|
| - chunk_length_ * sizeof(**audio));
|
| - }
|
| + render_mangler_->ProcessChunk(audio, audio);
|
| }
|
|
|
| -void IntelligibilityEnhancer::ProcessClearBlock(
|
| - const std::complex<float>* in_block,
|
| - std::complex<float>* out_block) {
|
| +void IntelligibilityEnhancer::ProcessAudioBlock(
|
| + const std::complex<float>* const* in_block,
|
| + size_t in_channels,
|
| + size_t frames,
|
| + size_t /* out_channels */,
|
| + std::complex<float>* const* out_block) {
|
| + RTC_DCHECK_EQ(freqs_, frames);
|
| if (is_speech_) {
|
| - clear_power_estimator_.Step(in_block);
|
| + clear_power_estimator_.Step(in_block[0]);
|
| }
|
| const std::vector<float>& clear_power = clear_power_estimator_.power();
|
| const std::vector<float>& noise_power = noise_power_estimator_->power();
|
| - MapToErbBands(&clear_power[0], render_filter_bank_,
|
| - filtered_clear_pow_.get());
|
| - MapToErbBands(&noise_power[0], capture_filter_bank_,
|
| - filtered_noise_pow_.get());
|
| - SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
|
| + MapToErbBands(clear_power.data(), render_filter_bank_,
|
| + filtered_clear_pow_.data());
|
| + MapToErbBands(noise_power.data(), capture_filter_bank_,
|
| + filtered_noise_pow_.data());
|
| + SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
|
| const float power_target =
|
| - std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);
|
| + std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f);
|
| const float power_top =
|
| - DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
|
| - SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
|
| + DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
| + SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
|
| const float power_bot =
|
| - DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
|
| + DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
| if (power_target >= power_bot && power_target <= power_top) {
|
| SolveForLambda(power_target);
|
| UpdateErbGains();
|
| } // Else experiencing power underflow, so do nothing.
|
| - gain_applier_.Apply(in_block, out_block);
|
| + for (size_t i = 0; i < in_channels; ++i) {
|
| + gain_applier_.Apply(in_block[i], out_block[i]);
|
| + }
|
| }
|
|
|
| void IntelligibilityEnhancer::SolveForLambda(float power_target) {
|
| @@ -182,9 +161,9 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target) {
|
| int iters = 0;
|
| while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
|
| const float lambda = (lambda_bot + lambda_top) / 2.f;
|
| - SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
|
| + SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
|
| const float power =
|
| - DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
|
| + DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
| if (power < power_target) {
|
| lambda_bot = lambda;
|
| } else {
|
| @@ -286,8 +265,8 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
| float* sols) {
|
| const float kMinPower = 1e-5f;
|
|
|
| - const float* pow_x0 = filtered_clear_pow_.get();
|
| - const float* pow_n0 = filtered_noise_pow_.get();
|
| + const float* pow_x0 = filtered_clear_pow_.data();
|
| + const float* pow_n0 = filtered_noise_pow_.data();
|
|
|
| for (size_t n = 0; n < start_freq; ++n) {
|
| sols[n] = 1.f;
|
| @@ -316,8 +295,8 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
| }
|
|
|
| bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
|
| - FloatToS16(audio, chunk_length_, &audio_s16_[0]);
|
| - vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);
|
| + FloatToS16(audio, chunk_length_, audio_s16_.data());
|
| + vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
|
| if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
|
| chunks_since_voice_ = 0;
|
| } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
|
|
|