webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

index f3d023ec620212142d41458e9c04070ad3dd1aae..82929ec308ec11da10167cc80c431e9a75631913 100644

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

@@ -116,7 +116,7 @@ IntelligibilityEnhancer::~IntelligibilityEnhancer() {

// Don't rely on this log, since the destructor isn't called when the app/tab

// is killed.

LOG(LS_INFO) << "Intelligibility Enhancer was active for "

- << static_cast<float>(num_active_chunks_) / num_chunks_

+ << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_

peah-webrtc 2016/09/09 09:12:16 This will cause an exception if IE is destroyed ri

aluebs-webrtc 2016/09/10 00:47:55 Done.

<< "% of the call.";

}

@@ -132,16 +132,20 @@ void IntelligibilityEnhancer::SetCaptureNoiseEstimate(

};

}

-void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,

- int sample_rate_hz,

- size_t num_channels) {

+void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio,

+ int sample_rate_hz) {

peah-webrtc 2016/09/09 09:12:16 Is it really necessary to pass the sample rate to

aluebs-webrtc 2016/09/10 00:47:55 AudioBuffer doesn't have a sample_rate() method. B

RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);

- RTC_CHECK_EQ(num_render_channels_, num_channels);

+ RTC_CHECK_EQ(num_render_channels_, audio->num_channels());

peah-webrtc 2016/09/09 09:12:16 This looks like something that there should be a D

aluebs-webrtc 2016/09/10 00:47:55 Done.

while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {

noise_power_estimator_.Step(noise_estimation_buffer_.data());

}

- is_speech_ = IsSpeech(audio[0]);

- render_mangler_->ProcessChunk(audio, audio);

+ float* const* in_low_band = audio->split_channels_f(kBand0To8kHz);

+ float* const* out_low_band = is_active_ ? in_low_band : nullptr;

+ is_speech_ = IsSpeech(in_low_band[0]);

+ render_mangler_->ProcessChunk(in_low_band, out_low_band);

peah-webrtc 2016/09/09 09:12:16 With this approach, you only let the audio pass th

aluebs-webrtc 2016/09/10 00:47:55 Good catch. I am not sure why I wrongly assumed th

+ if (is_active_) {

peah-webrtc 2016/09/09 09:12:16 You cannot turn this on/off during the call as tha

aluebs-webrtc 2016/09/10 00:47:55 Same as above.

+ DelayHighBands(audio);

+ }

}

void IntelligibilityEnhancer::ProcessAudioBlock(

@@ -154,8 +158,6 @@ void IntelligibilityEnhancer::ProcessAudioBlock(

if (is_speech_) {

clear_power_estimator_.Step(in_block[0]);

}

- SnrBasedEffectActivation();

- ++num_chunks_;

if (is_active_) {

++num_active_chunks_;

if (num_chunks_ % kGainUpdatePeriod == 0) {

@@ -179,6 +181,8 @@ void IntelligibilityEnhancer::ProcessAudioBlock(

} // Else experiencing power underflow, so do nothing.

}

+ SnrBasedEffectActivation();

+ ++num_chunks_;

for (size_t i = 0; i < in_channels; ++i) {

gain_applier_.Apply(in_block[i], out_block[i]);

}

@@ -369,4 +373,25 @@ bool IntelligibilityEnhancer::IsSpeech(const float* audio) {

return chunks_since_voice_ < kSpeechOffsetDelay;

}

+void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {

+ size_t num_bands = audio->num_bands();

+ if (num_bands != high_bands_buffers_.size() + 1u) {

peah-webrtc 2016/09/09 09:12:16 Please put the initialization of this in the const

aluebs-webrtc 2016/09/10 00:47:55 Good point. I thought it needed to be dynamic, but

+ high_bands_buffers_.clear();

+ const size_t initial_delay = render_mangler_->initial_delay();

+ for (size_t i = 0u; i < num_bands - 1; ++i) {

+ high_bands_buffers_.push_back(

+ std::unique_ptr<AudioRingBuffer>(new AudioRingBuffer(

+ num_render_channels_, chunk_length_ + initial_delay)));

+ high_bands_buffers_[i]->MoveReadPositionBackward(initial_delay);

+ }

+ for (size_t i = 0u; i < num_bands - 1; ++i) {

+ Band band = static_cast<Band>(i + 1);

+ high_bands_buffers_[i]->Write(audio->split_channels_const_f(band),

+ num_render_channels_, chunk_length_);

+ high_bands_buffers_[i]->Read(audio->split_channels_f(band),

+ num_render_channels_, chunk_length_);

+ }

} // namespace webrtc