webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)

Patch Set: Rebasing Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« webrtc/common_audio/lapped_transform.h ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

index f3d023ec620212142d41458e9c04070ad3dd1aae..f9d1c3c2734918e2cc06099856b1547e46398479 100644

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

@@ -68,6 +68,7 @@ void MapToErbBands(const float* pow,

IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,

size_t num_render_channels,

+ size_t num_bands,

size_t num_noise_bins)

: freqs_(RealFourier::ComplexLength(

RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),

@@ -110,14 +111,24 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,

render_mangler_.reset(new LappedTransform(

num_render_channels_, num_render_channels_, chunk_length_,

kbd_window.data(), window_size, window_size / 2, this));

+ const size_t initial_delay = render_mangler_->initial_delay();

+ for (size_t i = 0u; i < num_bands - 1; ++i) {

+ high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(

+ new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));

+ }

}

IntelligibilityEnhancer::~IntelligibilityEnhancer() {

- // Don't rely on this log, since the destructor isn't called when the app/tab

- // is killed.

- LOG(LS_INFO) << "Intelligibility Enhancer was active for "

- << static_cast<float>(num_active_chunks_) / num_chunks_

- << "% of the call.";

+ // Don't rely on this log, since the destructor isn't called when the

+ // app/tab is killed.

+ if (num_chunks_ > 0) {

+ LOG(LS_INFO) << "Intelligibility Enhancer was active for "

+ << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_

+ << "% of the call.";

+ } else {

+ LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";

+ }

}

void IntelligibilityEnhancer::SetCaptureNoiseEstimate(

@@ -132,16 +143,15 @@ void IntelligibilityEnhancer::SetCaptureNoiseEstimate(

};

}

-void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,

- int sample_rate_hz,

- size_t num_channels) {

- RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);

- RTC_CHECK_EQ(num_render_channels_, num_channels);

+void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {

+ RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());

while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {

noise_power_estimator_.Step(noise_estimation_buffer_.data());

}

- is_speech_ = IsSpeech(audio[0]);

- render_mangler_->ProcessChunk(audio, audio);

+ float* const* low_band = audio->split_channels_f(kBand0To8kHz);

+ is_speech_ = IsSpeech(low_band[0]);

+ render_mangler_->ProcessChunk(low_band, low_band);

+ DelayHighBands(audio);

}

void IntelligibilityEnhancer::ProcessAudioBlock(

@@ -369,4 +379,12 @@ bool IntelligibilityEnhancer::IsSpeech(const float* audio) {

return chunks_since_voice_ < kSpeechOffsetDelay;

}

+void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {

+ RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1u);

+ for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {

+ Band band = static_cast<Band>(i + 1);

+ high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);

+ }

} // namespace webrtc