Chromium Code Reviews| Index: webrtc/modules/audio_processing/audio_processing_impl.cc |
| diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc |
| index bbfb771182cc7196f0e8e0ca9fb63860617d6b8f..f1835404f6cc24e1f98d228ef7065c452ebeeb82 100644 |
| --- a/webrtc/modules/audio_processing/audio_processing_impl.cc |
| +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc |
| @@ -7,7 +7,6 @@ |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| - |
| #include "webrtc/modules/audio_processing/audio_processing_impl.h" |
| #include <assert.h> |
| @@ -15,8 +14,9 @@ |
| #include "webrtc/base/checks.h" |
| #include "webrtc/base/platform_file.h" |
| -#include "webrtc/common_audio/include/audio_util.h" |
| +#include "webrtc/common_audio/audio_converter.h" |
| #include "webrtc/common_audio/channel_buffer.h" |
| +#include "webrtc/common_audio/include/audio_util.h" |
| #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
| extern "C" { |
| #include "webrtc/modules/audio_processing/aec/aec_core.h" |
| @@ -29,6 +29,7 @@ extern "C" { |
| #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" |
| #include "webrtc/modules/audio_processing/gain_control_impl.h" |
| #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" |
| +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" |
| #include "webrtc/modules/audio_processing/level_estimator_impl.h" |
| #include "webrtc/modules/audio_processing/noise_suppression_impl.h" |
| #include "webrtc/modules/audio_processing/processing_component.h" |
| @@ -184,6 +185,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, |
| #endif |
| api_format_({{{kSampleRate16kHz, 1, false}, |
| {kSampleRate16kHz, 1, false}, |
| + {kSampleRate16kHz, 1, false}, |
| {kSampleRate16kHz, 1, false}}}), |
| fwd_proc_format_(kSampleRate16kHz), |
| rev_proc_format_(kSampleRate16kHz, 1), |
| @@ -210,7 +212,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, |
| #endif |
| beamformer_enabled_(config.Get<Beamforming>().enabled), |
| beamformer_(beamformer), |
| - array_geometry_(config.Get<Beamforming>().array_geometry) { |
| + array_geometry_(config.Get<Beamforming>().array_geometry), |
| + intelligibility_enabled_(config.Get<Intelligibility>().enabled) { |
| echo_cancellation_ = new EchoCancellationImpl(this, crit_); |
| component_list_.push_back(echo_cancellation_); |
| @@ -282,11 +285,17 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz, |
| ChannelLayout output_layout, |
| ChannelLayout reverse_layout) { |
| const ProcessingConfig processing_config = { |
| - {{input_sample_rate_hz, ChannelsFromLayout(input_layout), |
| + {{input_sample_rate_hz, |
| + ChannelsFromLayout(input_layout), |
| LayoutHasKeyboard(input_layout)}, |
| - {output_sample_rate_hz, ChannelsFromLayout(output_layout), |
| + {output_sample_rate_hz, |
| + ChannelsFromLayout(output_layout), |
| LayoutHasKeyboard(output_layout)}, |
| - {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout), |
| + {reverse_sample_rate_hz, |
| + ChannelsFromLayout(reverse_layout), |
| + LayoutHasKeyboard(reverse_layout)}, |
| + {reverse_sample_rate_hz, |
| + ChannelsFromLayout(reverse_layout), |
| LayoutHasKeyboard(reverse_layout)}}}; |
| return Initialize(processing_config); |
| @@ -301,14 +310,28 @@ int AudioProcessingImpl::InitializeLocked() { |
| const int fwd_audio_buffer_channels = |
| beamformer_enabled_ ? api_format_.input_stream().num_channels() |
| : api_format_.output_stream().num_channels(); |
| - if (api_format_.reverse_stream().num_channels() > 0) { |
| + const int rev_audio_buffer_out_num_frames = |
| + api_format_.reverse_output_stream().num_frames() == 0 |
| + ? rev_proc_format_.num_frames() |
| + : api_format_.reverse_output_stream().num_frames(); |
| + if (api_format_.reverse_input_stream().num_channels() > 0) { |
| render_audio_.reset(new AudioBuffer( |
| - api_format_.reverse_stream().num_frames(), |
| - api_format_.reverse_stream().num_channels(), |
| + api_format_.reverse_input_stream().num_frames(), |
| + api_format_.reverse_input_stream().num_channels(), |
| rev_proc_format_.num_frames(), rev_proc_format_.num_channels(), |
| - rev_proc_format_.num_frames())); |
| + rev_audio_buffer_out_num_frames)); |
| + if (rev_conversion_needed()) { |
| + render_converter_ = AudioConverter::Create( |
| + api_format_.reverse_input_stream().num_channels(), |
| + api_format_.reverse_input_stream().num_frames(), |
| + api_format_.reverse_output_stream().num_channels(), |
| + api_format_.reverse_output_stream().num_frames()); |
| + } else { |
| + render_converter_.reset(nullptr); |
| + } |
| } else { |
| render_audio_.reset(nullptr); |
| + render_converter_.reset(nullptr); |
| } |
| capture_audio_.reset(new AudioBuffer( |
| api_format_.input_stream().num_frames(), |
| @@ -329,6 +352,8 @@ int AudioProcessingImpl::InitializeLocked() { |
| InitializeBeamformer(); |
| + InitializeIntelligibility(); |
| + |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| int err = WriteInitMessage(); |
| @@ -396,7 +421,8 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { |
| // ...the forward stream is at 8 kHz. |
| rev_proc_rate = kSampleRate8kHz; |
| } else { |
| - if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) { |
| + if (api_format_.reverse_input_stream().sample_rate_hz() == |
| + kSampleRate32kHz) { |
| // ...or the input is at 32 kHz, in which case we use the splitting |
| // filter rather than the resampler. |
| rev_proc_rate = kSampleRate32kHz; |
| @@ -623,6 +649,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { |
| MaybeUpdateHistograms(); |
| AudioBuffer* ca = capture_audio_.get(); // For brevity. |
| + |
| if (use_new_agc_ && gain_control_->is_enabled()) { |
| agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), |
| fwd_proc_format_.num_frames()); |
| @@ -633,6 +660,11 @@ int AudioProcessingImpl::ProcessStreamLocked() { |
| ca->SplitIntoFrequencyBands(); |
| } |
| + if (intelligibility_enabled_) { |
| + intelligibility_enhancer_->AnalyzeCaptureAudio( |
| + ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels()); |
| + } |
| + |
| if (beamformer_enabled_) { |
| beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); |
| ca->set_num_channels(1); |
| @@ -683,50 +715,79 @@ int AudioProcessingImpl::ProcessStreamLocked() { |
| int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, |
| int samples_per_channel, |
| - int sample_rate_hz, |
| + int rev_sample_rate_hz, |
| ChannelLayout layout) { |
| const StreamConfig reverse_config = { |
| - sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), |
| + rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), |
| }; |
| if (samples_per_channel != reverse_config.num_frames()) { |
| return kBadDataLengthError; |
| } |
| - return AnalyzeReverseStream(data, reverse_config); |
| + return AnalyzeReverseStream(data, reverse_config, reverse_config, data); |
| +} |
| + |
| +int AudioProcessingImpl::ProcessReverseStream( |
| + const float* const* src, |
| + const StreamConfig& reverse_input_config, |
| + const StreamConfig& reverse_output_config, |
| + float* const* dest) { |
| + RETURN_ON_ERR(AnalyzeReverseStream(src, reverse_input_config, |
| + reverse_output_config, dest)); |
| + if (is_rev_processed()) { |
| + render_audio_->CopyTo(api_format_.reverse_output_stream(), dest); |
| + } else if (rev_conversion_needed()) { |
| + render_converter_->Convert(src, reverse_input_config.num_frames(), |
| + dest, reverse_output_config.num_frames()); |
| + } |
| + |
| + return kNoError; |
| } |
| int AudioProcessingImpl::AnalyzeReverseStream( |
| - const float* const* data, |
| - const StreamConfig& reverse_config) { |
| + const float* const* src, |
| + const StreamConfig& reverse_input_config, |
| + const StreamConfig& reverse_output_config, |
| + const float* const* dest) { |
| CriticalSectionScoped crit_scoped(crit_); |
| - if (data == NULL) { |
| + if (src == NULL) { |
| return kNullPointerError; |
| } |
| - if (reverse_config.num_channels() <= 0) { |
| + if (reverse_input_config.num_channels() <= 0) { |
| return kBadNumberChannelsError; |
| } |
| ProcessingConfig processing_config = api_format_; |
| - processing_config.reverse_stream() = reverse_config; |
| + processing_config.reverse_input_stream() = reverse_input_config; |
| + processing_config.reverse_output_stream() = reverse_output_config; |
| RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); |
| - assert(reverse_config.num_frames() == |
| - api_format_.reverse_stream().num_frames()); |
| + assert(reverse_input_config.num_frames() == |
| + api_format_.reverse_input_stream().num_frames()); |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| if (debug_file_->Open()) { |
| event_msg_->set_type(audioproc::Event::REVERSE_STREAM); |
| audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); |
| const size_t channel_size = |
| - sizeof(float) * api_format_.reverse_stream().num_frames(); |
| - for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i) |
| - msg->add_channel(data[i], channel_size); |
| + sizeof(float) * api_format_.reverse_input_stream().num_frames(); |
| + for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i) |
| + msg->add_channel(src[i], channel_size); |
| RETURN_ON_ERR(WriteMessageToDebugFile()); |
| } |
| #endif |
| - render_audio_->CopyFrom(data, api_format_.reverse_stream()); |
| - return AnalyzeReverseStreamLocked(); |
| + render_audio_->CopyFrom(src, api_format_.reverse_input_stream()); |
| + return ProcessReverseStreamLocked(); |
| +} |
| + |
| +int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { |
| + RETURN_ON_ERR(AnalyzeReverseStream(frame)); |
| + if (is_rev_processed()) { |
| + render_audio_->InterleaveTo(frame, true); |
| + } |
| + |
| + return kNoError; |
| } |
| int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
| @@ -751,12 +812,18 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
| } |
| ProcessingConfig processing_config = api_format_; |
| - processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_); |
| - processing_config.reverse_stream().set_num_channels(frame->num_channels_); |
| + processing_config.reverse_input_stream().set_sample_rate_hz( |
| + frame->sample_rate_hz_); |
| + processing_config.reverse_input_stream().set_num_channels( |
| + frame->num_channels_); |
| + processing_config.reverse_output_stream().set_sample_rate_hz( |
| + frame->sample_rate_hz_); |
| + processing_config.reverse_output_stream().set_num_channels( |
| + frame->num_channels_); |
| RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); |
| if (frame->samples_per_channel_ != |
| - api_format_.reverse_stream().num_frames()) { |
| + api_format_.reverse_input_stream().num_frames()) { |
| return kBadDataLengthError; |
| } |
| @@ -770,23 +837,32 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
| RETURN_ON_ERR(WriteMessageToDebugFile()); |
| } |
| #endif |
| - |
| render_audio_->DeinterleaveFrom(frame); |
| - return AnalyzeReverseStreamLocked(); |
| + return ProcessReverseStreamLocked(); |
| } |
| -int AudioProcessingImpl::AnalyzeReverseStreamLocked() { |
| +int AudioProcessingImpl::ProcessReverseStreamLocked() { |
| AudioBuffer* ra = render_audio_.get(); // For brevity. |
| if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { |
| ra->SplitIntoFrequencyBands(); |
| } |
| + if (intelligibility_enabled_) { |
| + intelligibility_enhancer_->ProcessRenderAudio( |
| + ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels()); |
| + } |
| + |
| RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); |
| RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); |
| if (!use_new_agc_) { |
| RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); |
| } |
| + if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz && |
| + is_rev_processed()) { |
| + ra->MergeFrequencyBands(); |
| + } |
| + |
| return kNoError; |
| } |
| @@ -1003,6 +1079,20 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { |
| return false; |
| } |
| +bool AudioProcessingImpl::is_rev_processed() const { |
| + return intelligibility_enabled_ && intelligibility_enhancer_->active(); |
| +} |
| + |
| +bool AudioProcessingImpl::rev_conversion_needed() const { |
| + // Only supports conversions supported by AudioConverter. |
| + return ((api_format_.reverse_input_stream() != |
| + api_format_.reverse_output_stream()) && |
| + ((api_format_.reverse_output_stream().num_channels() == |
|
ekm
2015/08/01 04:21:37
Added these constraints because CommonFormats/Audi
aluebs-webrtc
2015/08/03 16:30:50
I think 0 channels just means it doesn't need any
Andrew MacDonald
2015/08/05 06:22:39
If we want to be consistent, I think the test is i
ekm
2015/08/07 06:05:59
Done.
|
| + api_format_.reverse_input_stream().num_channels()) || |
| + (api_format_.reverse_output_stream().num_channels() == 1) || |
| + (api_format_.reverse_output_stream().num_channels() == 1))); |
| +} |
| + |
| void AudioProcessingImpl::InitializeExperimentalAgc() { |
| if (use_new_agc_) { |
| if (!agc_manager_.get()) { |
| @@ -1035,6 +1125,16 @@ void AudioProcessingImpl::InitializeBeamformer() { |
| } |
| } |
| +void AudioProcessingImpl::InitializeIntelligibility() { |
| + if (intelligibility_enabled_) { |
| + IntelligibilityEnhancer::Config config; |
| + config.sample_rate_hz = split_rate_; |
| + config.num_capture_channels = capture_audio_->num_channels(); |
| + config.num_render_channels = render_audio_->num_channels(); |
| + intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config)); |
| + } |
| +} |
| + |
| void AudioProcessingImpl::MaybeUpdateHistograms() { |
| static const int kMinDiffDelayMs = 60; |
| @@ -1133,9 +1233,12 @@ int AudioProcessingImpl::WriteInitMessage() { |
| msg->set_sample_rate(api_format_.input_stream().sample_rate_hz()); |
| msg->set_num_input_channels(api_format_.input_stream().num_channels()); |
| msg->set_num_output_channels(api_format_.output_stream().num_channels()); |
| - msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels()); |
| - msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz()); |
| + msg->set_num_reverse_channels( |
| + api_format_.reverse_input_stream().num_channels()); |
| + msg->set_reverse_sample_rate( |
| + api_format_.reverse_input_stream().sample_rate_hz()); |
| msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz()); |
| + // TODO(ekmeyerson): Add reverse output fields to event_msg_. |
| int err = WriteMessageToDebugFile(); |
| if (err != kNoError) { |