 Chromium Code Reviews
 Chromium Code Reviews Issue 1234463003:
  Integrate Intelligibility with APM  (Closed) 
  Base URL: https://chromium.googlesource.com/external/webrtc.git@master
    
  
    Issue 1234463003:
  Integrate Intelligibility with APM  (Closed) 
  Base URL: https://chromium.googlesource.com/external/webrtc.git@master| Index: webrtc/modules/audio_processing/audio_processing_impl.cc | 
| diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc | 
| index bbfb771182cc7196f0e8e0ca9fb63860617d6b8f..47dc30df1ff73e543c74d5c7d9f2aae994f122fb 100644 | 
| --- a/webrtc/modules/audio_processing/audio_processing_impl.cc | 
| +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc | 
| @@ -15,8 +15,9 @@ | 
| #include "webrtc/base/checks.h" | 
| #include "webrtc/base/platform_file.h" | 
| -#include "webrtc/common_audio/include/audio_util.h" | 
| +#include "webrtc/common_audio/audio_converter.h" | 
| #include "webrtc/common_audio/channel_buffer.h" | 
| +#include "webrtc/common_audio/include/audio_util.h" | 
| #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" | 
| extern "C" { | 
| #include "webrtc/modules/audio_processing/aec/aec_core.h" | 
| @@ -29,6 +30,7 @@ extern "C" { | 
| #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" | 
| #include "webrtc/modules/audio_processing/gain_control_impl.h" | 
| #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" | 
| +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" | 
| #include "webrtc/modules/audio_processing/level_estimator_impl.h" | 
| #include "webrtc/modules/audio_processing/noise_suppression_impl.h" | 
| #include "webrtc/modules/audio_processing/processing_component.h" | 
| @@ -184,6 +186,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, | 
| #endif | 
| api_format_({{{kSampleRate16kHz, 1, false}, | 
| {kSampleRate16kHz, 1, false}, | 
| + {kSampleRate16kHz, 1, false}, | 
| {kSampleRate16kHz, 1, false}}}), | 
| fwd_proc_format_(kSampleRate16kHz), | 
| rev_proc_format_(kSampleRate16kHz, 1), | 
| @@ -210,7 +213,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, | 
| #endif | 
| beamformer_enabled_(config.Get<Beamforming>().enabled), | 
| beamformer_(beamformer), | 
| - array_geometry_(config.Get<Beamforming>().array_geometry) { | 
| + array_geometry_(config.Get<Beamforming>().array_geometry), | 
| + intelligibility_enabled_(config.Get<Intelligibility>().enabled) { | 
| echo_cancellation_ = new EchoCancellationImpl(this, crit_); | 
| component_list_.push_back(echo_cancellation_); | 
| @@ -282,11 +286,17 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz, | 
| ChannelLayout output_layout, | 
| ChannelLayout reverse_layout) { | 
| const ProcessingConfig processing_config = { | 
| - {{input_sample_rate_hz, ChannelsFromLayout(input_layout), | 
| + {{input_sample_rate_hz, | 
| + ChannelsFromLayout(input_layout), | 
| LayoutHasKeyboard(input_layout)}, | 
| - {output_sample_rate_hz, ChannelsFromLayout(output_layout), | 
| + {output_sample_rate_hz, | 
| + ChannelsFromLayout(output_layout), | 
| LayoutHasKeyboard(output_layout)}, | 
| - {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout), | 
| + {reverse_sample_rate_hz, | 
| + ChannelsFromLayout(reverse_layout), | 
| + LayoutHasKeyboard(reverse_layout)}, | 
| + {reverse_sample_rate_hz, | 
| + ChannelsFromLayout(reverse_layout), | 
| LayoutHasKeyboard(reverse_layout)}}}; | 
| return Initialize(processing_config); | 
| @@ -301,14 +311,28 @@ int AudioProcessingImpl::InitializeLocked() { | 
| const int fwd_audio_buffer_channels = | 
| beamformer_enabled_ ? api_format_.input_stream().num_channels() | 
| : api_format_.output_stream().num_channels(); | 
| - if (api_format_.reverse_stream().num_channels() > 0) { | 
| + const int rev_audio_buffer_out_num_frames = | 
| + api_format_.reverse_output_stream().num_frames() == 0 | 
| + ? rev_proc_format_.num_frames() | 
| + : api_format_.reverse_output_stream().num_frames(); | 
| + if (api_format_.reverse_input_stream().num_channels() > 0) { | 
| render_audio_.reset(new AudioBuffer( | 
| - api_format_.reverse_stream().num_frames(), | 
| - api_format_.reverse_stream().num_channels(), | 
| + api_format_.reverse_input_stream().num_frames(), | 
| + api_format_.reverse_input_stream().num_channels(), | 
| rev_proc_format_.num_frames(), rev_proc_format_.num_channels(), | 
| - rev_proc_format_.num_frames())); | 
| + rev_audio_buffer_out_num_frames)); | 
| + if (rev_conversion_needed()) { | 
| + render_converter_ = AudioConverter::Create( | 
| + api_format_.reverse_input_stream().num_channels(), | 
| + api_format_.reverse_input_stream().num_frames(), | 
| + api_format_.reverse_output_stream().num_channels(), | 
| + api_format_.reverse_output_stream().num_frames()); | 
| + } else { | 
| + render_converter_.reset(nullptr); | 
| + } | 
| } else { | 
| render_audio_.reset(nullptr); | 
| + render_converter_.reset(nullptr); | 
| } | 
| capture_audio_.reset(new AudioBuffer( | 
| api_format_.input_stream().num_frames(), | 
| @@ -329,6 +353,8 @@ int AudioProcessingImpl::InitializeLocked() { | 
| InitializeBeamformer(); | 
| + InitializeIntelligibility(); | 
| + | 
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP | 
| if (debug_file_->Open()) { | 
| int err = WriteInitMessage(); | 
| @@ -396,7 +422,8 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { | 
| // ...the forward stream is at 8 kHz. | 
| rev_proc_rate = kSampleRate8kHz; | 
| } else { | 
| - if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) { | 
| + if (api_format_.reverse_input_stream().sample_rate_hz() == | 
| + kSampleRate32kHz) { | 
| // ...or the input is at 32 kHz, in which case we use the splitting | 
| // filter rather than the resampler. | 
| rev_proc_rate = kSampleRate32kHz; | 
| @@ -623,6 +650,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { | 
| MaybeUpdateHistograms(); | 
| AudioBuffer* ca = capture_audio_.get(); // For brevity. | 
| + | 
| if (use_new_agc_ && gain_control_->is_enabled()) { | 
| agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), | 
| fwd_proc_format_.num_frames()); | 
| @@ -633,6 +661,11 @@ int AudioProcessingImpl::ProcessStreamLocked() { | 
| ca->SplitIntoFrequencyBands(); | 
| } | 
| + if (intelligibility_enabled_) { | 
| + intelligibility_enhancer_->AnalyzeCaptureAudio( | 
| + ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels()); | 
| + } | 
| + | 
| if (beamformer_enabled_) { | 
| beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); | 
| ca->set_num_channels(1); | 
| @@ -683,50 +716,82 @@ int AudioProcessingImpl::ProcessStreamLocked() { | 
| int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, | 
| int samples_per_channel, | 
| - int sample_rate_hz, | 
| + int rev_sample_rate_hz, | 
| ChannelLayout layout) { | 
| const StreamConfig reverse_config = { | 
| - sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), | 
| + rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), | 
| }; | 
| if (samples_per_channel != reverse_config.num_frames()) { | 
| return kBadDataLengthError; | 
| } | 
| - return AnalyzeReverseStream(data, reverse_config); | 
| + return AnalyzeReverseStream(data, reverse_config, reverse_config, data); | 
| +} | 
| + | 
| +int AudioProcessingImpl::ProcessReverseStream( | 
| + const float* const* src, | 
| + const StreamConfig& reverse_input_config, | 
| + const StreamConfig& reverse_output_config, | 
| + float* const* dest) { | 
| + RETURN_ON_ERR(AnalyzeReverseStream(src, reverse_input_config, | 
| + reverse_output_config, dest)); | 
| + if (is_rev_processed()) { | 
| + render_audio_->CopyTo(api_format_.reverse_output_stream(), dest); | 
| + } else if (rev_conversion_needed()) { | 
| + render_converter_->Convert(src, reverse_input_config.num_frames(), | 
| + dest, reverse_output_config.num_frames()); | 
| + } else { | 
| + CopyAudio<float>(src, reverse_input_config.num_frames(), | 
| 
Andrew MacDonald
2015/08/05 06:18:18
You shouldn't need the explicit float; can be dedu
 
ekm
2015/08/07 06:06:00
Done.
 | 
| + reverse_input_config.num_channels(), dest); | 
| + } | 
| + | 
| + return kNoError; | 
| } | 
| int AudioProcessingImpl::AnalyzeReverseStream( | 
| - const float* const* data, | 
| - const StreamConfig& reverse_config) { | 
| + const float* const* src, | 
| + const StreamConfig& reverse_input_config, | 
| + const StreamConfig& reverse_output_config, | 
| + const float* const* dest) { | 
| CriticalSectionScoped crit_scoped(crit_); | 
| - if (data == NULL) { | 
| + if (src == NULL) { | 
| return kNullPointerError; | 
| } | 
| - if (reverse_config.num_channels() <= 0) { | 
| + if (reverse_input_config.num_channels() <= 0) { | 
| return kBadNumberChannelsError; | 
| } | 
| ProcessingConfig processing_config = api_format_; | 
| - processing_config.reverse_stream() = reverse_config; | 
| + processing_config.reverse_input_stream() = reverse_input_config; | 
| + processing_config.reverse_output_stream() = reverse_output_config; | 
| RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); | 
| - assert(reverse_config.num_frames() == | 
| - api_format_.reverse_stream().num_frames()); | 
| + assert(reverse_input_config.num_frames() == | 
| + api_format_.reverse_input_stream().num_frames()); | 
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP | 
| if (debug_file_->Open()) { | 
| event_msg_->set_type(audioproc::Event::REVERSE_STREAM); | 
| audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); | 
| const size_t channel_size = | 
| - sizeof(float) * api_format_.reverse_stream().num_frames(); | 
| - for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i) | 
| - msg->add_channel(data[i], channel_size); | 
| + sizeof(float) * api_format_.reverse_input_stream().num_frames(); | 
| + for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i) | 
| + msg->add_channel(src[i], channel_size); | 
| RETURN_ON_ERR(WriteMessageToDebugFile()); | 
| } | 
| #endif | 
| - render_audio_->CopyFrom(data, api_format_.reverse_stream()); | 
| - return AnalyzeReverseStreamLocked(); | 
| + render_audio_->CopyFrom(src, api_format_.reverse_input_stream()); | 
| + return ProcessReverseStreamLocked(); | 
| +} | 
| + | 
| +int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { | 
| + RETURN_ON_ERR(AnalyzeReverseStream(frame)); | 
| + if (is_rev_processed()) { | 
| + render_audio_->InterleaveTo(frame, true); | 
| + } | 
| + | 
| + return kNoError; | 
| } | 
| int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { | 
| @@ -751,12 +816,18 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { | 
| } | 
| ProcessingConfig processing_config = api_format_; | 
| - processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_); | 
| - processing_config.reverse_stream().set_num_channels(frame->num_channels_); | 
| + processing_config.reverse_input_stream().set_sample_rate_hz( | 
| + frame->sample_rate_hz_); | 
| + processing_config.reverse_input_stream().set_num_channels( | 
| + frame->num_channels_); | 
| + processing_config.reverse_output_stream().set_sample_rate_hz( | 
| + frame->sample_rate_hz_); | 
| + processing_config.reverse_output_stream().set_num_channels( | 
| + frame->num_channels_); | 
| RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); | 
| if (frame->samples_per_channel_ != | 
| - api_format_.reverse_stream().num_frames()) { | 
| + api_format_.reverse_input_stream().num_frames()) { | 
| return kBadDataLengthError; | 
| } | 
| @@ -770,23 +841,32 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { | 
| RETURN_ON_ERR(WriteMessageToDebugFile()); | 
| } | 
| #endif | 
| - | 
| render_audio_->DeinterleaveFrom(frame); | 
| - return AnalyzeReverseStreamLocked(); | 
| + return ProcessReverseStreamLocked(); | 
| } | 
| -int AudioProcessingImpl::AnalyzeReverseStreamLocked() { | 
| +int AudioProcessingImpl::ProcessReverseStreamLocked() { | 
| AudioBuffer* ra = render_audio_.get(); // For brevity. | 
| if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { | 
| ra->SplitIntoFrequencyBands(); | 
| } | 
| + if (intelligibility_enabled_) { | 
| + intelligibility_enhancer_->ProcessRenderAudio( | 
| + ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels()); | 
| + } | 
| + | 
| RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); | 
| RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); | 
| if (!use_new_agc_) { | 
| RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); | 
| } | 
| + if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz && | 
| + is_rev_processed()) { | 
| + ra->MergeFrequencyBands(); | 
| + } | 
| + | 
| return kNoError; | 
| } | 
| @@ -1003,6 +1083,20 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { | 
| return false; | 
| } | 
| +bool AudioProcessingImpl::is_rev_processed() const { | 
| + return intelligibility_enabled_ && intelligibility_enhancer_->active(); | 
| +} | 
| + | 
| +bool AudioProcessingImpl::rev_conversion_needed() const { | 
| + // Only supports conversions supported by AudioConverter. | 
| + return ((api_format_.reverse_input_stream() != | 
| + api_format_.reverse_output_stream()) && | 
| + ((api_format_.reverse_output_stream().num_channels() == | 
| + api_format_.reverse_input_stream().num_channels()) || | 
| + (api_format_.reverse_output_stream().num_channels() == 1) || | 
| + (api_format_.reverse_output_stream().num_channels() == 1))); | 
| +} | 
| + | 
| void AudioProcessingImpl::InitializeExperimentalAgc() { | 
| if (use_new_agc_) { | 
| if (!agc_manager_.get()) { | 
| @@ -1035,6 +1129,16 @@ void AudioProcessingImpl::InitializeBeamformer() { | 
| } | 
| } | 
| +void AudioProcessingImpl::InitializeIntelligibility() { | 
| + if (intelligibility_enabled_) { | 
| + IntelligibilityEnhancer::Config config; | 
| + config.sample_rate_hz = split_rate_; | 
| + config.num_capture_channels = capture_audio_->num_channels(); | 
| + config.num_render_channels = render_audio_->num_channels(); | 
| + intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config)); | 
| + } | 
| +} | 
| + | 
| void AudioProcessingImpl::MaybeUpdateHistograms() { | 
| static const int kMinDiffDelayMs = 60; | 
| @@ -1133,9 +1237,12 @@ int AudioProcessingImpl::WriteInitMessage() { | 
| msg->set_sample_rate(api_format_.input_stream().sample_rate_hz()); | 
| msg->set_num_input_channels(api_format_.input_stream().num_channels()); | 
| msg->set_num_output_channels(api_format_.output_stream().num_channels()); | 
| - msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels()); | 
| - msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz()); | 
| + msg->set_num_reverse_channels( | 
| + api_format_.reverse_input_stream().num_channels()); | 
| + msg->set_reverse_sample_rate( | 
| + api_format_.reverse_input_stream().sample_rate_hz()); | 
| msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz()); | 
| + // TODO(ekmeyerson): Add reverse output fields to event_msg_. | 
| int err = WriteMessageToDebugFile(); | 
| if (err != kNoError) { |