webrtc/modules/audio_processing/audio_processing_impl.cc - Issue 1234463003: Integrate Intelligibility with APM

Unified Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Added resampling support to InterleaveTo; removed VAD logic Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/audio_buffer.cc ('K') | « webrtc/modules/audio_processing/audio_processing_impl.h ('k') | webrtc/modules/audio_processing/include/audio_processing.h » ('j') | webrtc/modules/audio_processing/include/audio_processing.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/audio_processing_impl.cc

diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc

index 87b82a6a3509131adae9ed698cc0f896fd01d4c0..1ee12d9aae60f8e872837bd971f70689b1bc4558 100644

--- a/webrtc/modules/audio_processing/audio_processing_impl.cc

+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc

@@ -28,6 +28,7 @@ extern "C" {

#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"

#include "webrtc/modules/audio_processing/gain_control_impl.h"

#include "webrtc/modules/audio_processing/high_pass_filter_impl.h"

+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"

#include "webrtc/modules/audio_processing/level_estimator_impl.h"

#include "webrtc/modules/audio_processing/noise_suppression_impl.h"

#include "webrtc/modules/audio_processing/processing_component.h"

@@ -195,7 +196,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,

beamformer_enabled_(config.Get<Beamforming>().enabled),

beamformer_(beamformer),

array_geometry_(config.Get<Beamforming>().array_geometry),

- supports_48kHz_(config.Get<AudioProcessing48kHzSupport>().enabled) {

+ supports_48kHz_(config.Get<AudioProcessing48kHzSupport>().enabled),

+ intelligibility_enabled_(config.Get<Intelligibility>().enabled) {

echo_cancellation_ = new EchoCancellationImpl(this, crit_);

component_list_.push_back(echo_cancellation_);

@@ -305,6 +307,8 @@ int AudioProcessingImpl::InitializeLocked() {

InitializeBeamformer();

+ InitializeIntelligibility();

#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

if (debug_file_->Open()) {

int err = WriteInitMessage();

@@ -599,6 +603,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {

MaybeUpdateHistograms();

AudioBuffer* ca = capture_audio_.get(); // For brevity.

if (use_new_agc_ && gain_control_->is_enabled()) {

agc_manager_->AnalyzePreProcess(ca->channels()[0],

ca->num_channels(),

@@ -610,6 +615,11 @@ int AudioProcessingImpl::ProcessStreamLocked() {

ca->SplitIntoFrequencyBands();

}

+ if (intelligibility_enabled_) {

+ intelligibility_enhancer_->AnalyzeCaptureAudio(

+ ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());

+ }

if (beamformer_enabled_) {

beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());

ca->set_num_channels(1);

@@ -664,9 +674,22 @@ int AudioProcessingImpl::ProcessStreamLocked() {

return kNoError;

}

+int AudioProcessingImpl::ProcessReverseStream(float* const* data,

+ int samples_per_channel,

+ int rev_sample_rate_hz,

+ ChannelLayout layout) {

+ RETURN_ON_ERR(AnalyzeReverseStream(data, samples_per_channel,

+ rev_sample_rate_hz, layout));

+ if (is_rev_processed()) {

+ render_audio_->CopyTo(samples_per_channel, layout, data);

+ }

+ return kNoError;

int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,

int samples_per_channel,

- int sample_rate_hz,

+ int rev_sample_rate_hz,

ChannelLayout layout) {

CriticalSectionScoped crit_scoped(crit_);

if (data == NULL) {

@@ -674,12 +697,10 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,

}

const int num_channels = ChannelsFromLayout(layout);

- RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),

- fwd_out_format_.rate(),

- sample_rate_hz,

- fwd_in_format_.num_channels(),

- fwd_out_format_.num_channels(),

- num_channels));

+ RETURN_ON_ERR(

+ MaybeInitializeLocked(fwd_in_format_.rate(), fwd_out_format_.rate(),

+ rev_sample_rate_hz, fwd_in_format_.num_channels(),

+ fwd_out_format_.num_channels(), num_channels));

if (samples_per_channel != rev_in_format_.samples_per_channel()) {

return kBadDataLengthError;

}

@@ -697,7 +718,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,

#endif

render_audio_->CopyFrom(data, samples_per_channel, layout);

- return AnalyzeReverseStreamLocked();

+ return ProcessReverseStreamLocked();

}

int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {

@@ -716,7 +737,6 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {

if (frame->sample_rate_hz_ != fwd_in_format_.rate()) {

return kBadSampleRateError;

}

RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),

fwd_out_format_.rate(),

frame->sample_rate_hz_,

@@ -738,23 +758,36 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {

RETURN_ON_ERR(WriteMessageToDebugFile());

}

#endif

render_audio_->DeinterleaveFrom(frame);

- return AnalyzeReverseStreamLocked();

+ RETURN_ON_ERR(ProcessReverseStreamLocked());

+ if (is_rev_processed()) {

+ render_audio_->InterleaveTo(frame, true);

+ }

+ return kNoError;

}

-int AudioProcessingImpl::AnalyzeReverseStreamLocked() {

+int AudioProcessingImpl::ProcessReverseStreamLocked() {

AudioBuffer* ra = render_audio_.get(); // For brevity.

if (rev_proc_format_.rate() == kSampleRate32kHz) {

ra->SplitIntoFrequencyBands();

}

+ if (intelligibility_enabled_) {

+ intelligibility_enhancer_->ProcessRenderAudio(

+ ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());

+ }

RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));

RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));

if (!use_new_agc_) {

RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));

}

+ if (rev_proc_format_.rate() == kSampleRate32kHz && is_rev_processed()) {

aluebs-webrtc 2015/07/29 22:17:10 Maybe this is a good time to make 32kHz and 48kHz

ekm 2015/07/29 23:35:06 I'd prefer to save this for a later cl, since this

aluebs-webrtc 2015/07/30 15:28:07 Leaving it for another CL sounds reasonable. But I

ekm 2015/07/30 21:23:50 Yes. I'm interested to hear what the difference so

+ ra->MergeFrequencyBands();

+ }

return kNoError;

}

@@ -969,6 +1002,10 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {

return false;

}

+bool AudioProcessingImpl::is_rev_processed() const {

+ return intelligibility_enabled_ && intelligibility_enhancer_->active();

Andrew MacDonald 2015/07/29 03:52:27 Hmm, checking active() is nice to save some comple

ekm 2015/07/29 23:35:06 Yep, we have this smoothing feature implemented, a

void AudioProcessingImpl::InitializeExperimentalAgc() {

if (use_new_agc_) {

if (!agc_manager_.get()) {

@@ -1001,6 +1038,16 @@ void AudioProcessingImpl::InitializeBeamformer() {

}

+void AudioProcessingImpl::InitializeIntelligibility() {

+ if (intelligibility_enabled_) {

+ IntelligibilityEnhancer::Config config;

+ config.sample_rate_hz = split_rate_;

+ config.num_capture_channels = capture_audio_->num_channels();

+ config.num_render_channels = render_audio_->num_channels();

+ intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));

+ }

void AudioProcessingImpl::MaybeUpdateHistograms() {

static const int kMinDiffDelayMs = 60;