Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
index f3d023ec620212142d41458e9c04070ad3dd1aae..f9d1c3c2734918e2cc06099856b1547e46398479 100644 |
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
@@ -68,6 +68,7 @@ void MapToErbBands(const float* pow, |
IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
size_t num_render_channels, |
+ size_t num_bands, |
size_t num_noise_bins) |
: freqs_(RealFourier::ComplexLength( |
RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
@@ -110,14 +111,24 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
render_mangler_.reset(new LappedTransform( |
num_render_channels_, num_render_channels_, chunk_length_, |
kbd_window.data(), window_size, window_size / 2, this)); |
+ |
+ const size_t initial_delay = render_mangler_->initial_delay(); |
+ for (size_t i = 0u; i < num_bands - 1; ++i) { |
+ high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>( |
+ new intelligibility::DelayBuffer(initial_delay, num_render_channels_))); |
+ } |
} |
IntelligibilityEnhancer::~IntelligibilityEnhancer() { |
- // Don't rely on this log, since the destructor isn't called when the app/tab |
- // is killed. |
- LOG(LS_INFO) << "Intelligibility Enhancer was active for " |
- << static_cast<float>(num_active_chunks_) / num_chunks_ |
- << "% of the call."; |
+ // Don't rely on this log, since the destructor isn't called when the |
+ // app/tab is killed. |
+ if (num_chunks_ > 0) { |
+ LOG(LS_INFO) << "Intelligibility Enhancer was active for " |
+ << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_ |
+ << "% of the call."; |
+ } else { |
+ LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk."; |
+ } |
} |
void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
@@ -132,16 +143,15 @@ void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
}; |
} |
-void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, |
- int sample_rate_hz, |
- size_t num_channels) { |
- RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); |
- RTC_CHECK_EQ(num_render_channels_, num_channels); |
+void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) { |
+ RTC_DCHECK_EQ(num_render_channels_, audio->num_channels()); |
while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { |
noise_power_estimator_.Step(noise_estimation_buffer_.data()); |
} |
- is_speech_ = IsSpeech(audio[0]); |
- render_mangler_->ProcessChunk(audio, audio); |
+ float* const* low_band = audio->split_channels_f(kBand0To8kHz); |
+ is_speech_ = IsSpeech(low_band[0]); |
+ render_mangler_->ProcessChunk(low_band, low_band); |
+ DelayHighBands(audio); |
} |
void IntelligibilityEnhancer::ProcessAudioBlock( |
@@ -369,4 +379,12 @@ bool IntelligibilityEnhancer::IsSpeech(const float* audio) { |
return chunks_since_voice_ < kSpeechOffsetDelay; |
} |
+void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) { |
+ RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1u); |
+ for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) { |
+ Band band = static_cast<Band>(i + 1); |
+ high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_); |
+ } |
+} |
+ |
} // namespace webrtc |