| Index: webrtc/modules/audio_processing/audio_processing_impl.cc
|
| diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
|
| index 81d6c70be9131a4a3e263f6738e95369acdd3327..c9e4ddc10c5f2aa7a330dc283ea5cd114161075f 100644
|
| --- a/webrtc/modules/audio_processing/audio_processing_impl.cc
|
| +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
|
| @@ -15,8 +15,9 @@
|
|
|
| #include "webrtc/base/checks.h"
|
| #include "webrtc/base/platform_file.h"
|
| -#include "webrtc/common_audio/include/audio_util.h"
|
| +#include "webrtc/common_audio/audio_converter.h"
|
| #include "webrtc/common_audio/channel_buffer.h"
|
| +#include "webrtc/common_audio/include/audio_util.h"
|
| #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
| extern "C" {
|
| #include "webrtc/modules/audio_processing/aec/aec_core.h"
|
| @@ -29,6 +30,7 @@ extern "C" {
|
| #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
|
| #include "webrtc/modules/audio_processing/gain_control_impl.h"
|
| #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
|
| +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
| #include "webrtc/modules/audio_processing/level_estimator_impl.h"
|
| #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
|
| #include "webrtc/modules/audio_processing/processing_component.h"
|
| @@ -184,6 +186,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
| #endif
|
| api_format_({{{kSampleRate16kHz, 1, false},
|
| {kSampleRate16kHz, 1, false},
|
| + {kSampleRate16kHz, 1, false},
|
| {kSampleRate16kHz, 1, false}}}),
|
| fwd_proc_format_(kSampleRate16kHz),
|
| rev_proc_format_(kSampleRate16kHz, 1),
|
| @@ -210,7 +213,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
| #endif
|
| beamformer_enabled_(config.Get<Beamforming>().enabled),
|
| beamformer_(beamformer),
|
| - array_geometry_(config.Get<Beamforming>().array_geometry) {
|
| + array_geometry_(config.Get<Beamforming>().array_geometry),
|
| + intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
|
| echo_cancellation_ = new EchoCancellationImpl(this, crit_);
|
| component_list_.push_back(echo_cancellation_);
|
|
|
| @@ -282,11 +286,17 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
|
| ChannelLayout output_layout,
|
| ChannelLayout reverse_layout) {
|
| const ProcessingConfig processing_config = {
|
| - {{input_sample_rate_hz, ChannelsFromLayout(input_layout),
|
| + {{input_sample_rate_hz,
|
| + ChannelsFromLayout(input_layout),
|
| LayoutHasKeyboard(input_layout)},
|
| - {output_sample_rate_hz, ChannelsFromLayout(output_layout),
|
| + {output_sample_rate_hz,
|
| + ChannelsFromLayout(output_layout),
|
| LayoutHasKeyboard(output_layout)},
|
| - {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout),
|
| + {reverse_sample_rate_hz,
|
| + ChannelsFromLayout(reverse_layout),
|
| + LayoutHasKeyboard(reverse_layout)},
|
| + {reverse_sample_rate_hz,
|
| + ChannelsFromLayout(reverse_layout),
|
| LayoutHasKeyboard(reverse_layout)}}};
|
|
|
| return Initialize(processing_config);
|
| @@ -301,14 +311,28 @@ int AudioProcessingImpl::InitializeLocked() {
|
| const int fwd_audio_buffer_channels =
|
| beamformer_enabled_ ? api_format_.input_stream().num_channels()
|
| : api_format_.output_stream().num_channels();
|
| - if (api_format_.reverse_stream().num_channels() > 0) {
|
| + const int rev_audio_buffer_out_num_frames =
|
| + api_format_.reverse_output_stream().num_frames() == 0
|
| + ? rev_proc_format_.num_frames()
|
| + : api_format_.reverse_output_stream().num_frames();
|
| + if (api_format_.reverse_input_stream().num_channels() > 0) {
|
| render_audio_.reset(new AudioBuffer(
|
| - api_format_.reverse_stream().num_frames(),
|
| - api_format_.reverse_stream().num_channels(),
|
| + api_format_.reverse_input_stream().num_frames(),
|
| + api_format_.reverse_input_stream().num_channels(),
|
| rev_proc_format_.num_frames(), rev_proc_format_.num_channels(),
|
| - rev_proc_format_.num_frames()));
|
| + rev_audio_buffer_out_num_frames));
|
| + if (rev_conversion_needed()) {
|
| + render_converter_ = AudioConverter::Create(
|
| + api_format_.reverse_input_stream().num_channels(),
|
| + api_format_.reverse_input_stream().num_frames(),
|
| + api_format_.reverse_output_stream().num_channels(),
|
| + api_format_.reverse_output_stream().num_frames());
|
| + } else {
|
| + render_converter_.reset(nullptr);
|
| + }
|
| } else {
|
| render_audio_.reset(nullptr);
|
| + render_converter_.reset(nullptr);
|
| }
|
| capture_audio_.reset(new AudioBuffer(
|
| api_format_.input_stream().num_frames(),
|
| @@ -329,6 +353,8 @@ int AudioProcessingImpl::InitializeLocked() {
|
|
|
| InitializeBeamformer();
|
|
|
| + InitializeIntelligibility();
|
| +
|
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
| if (debug_file_->Open()) {
|
| int err = WriteInitMessage();
|
| @@ -396,7 +422,8 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
|
| // ...the forward stream is at 8 kHz.
|
| rev_proc_rate = kSampleRate8kHz;
|
| } else {
|
| - if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) {
|
| + if (api_format_.reverse_input_stream().sample_rate_hz() ==
|
| + kSampleRate32kHz) {
|
| // ...or the input is at 32 kHz, in which case we use the splitting
|
| // filter rather than the resampler.
|
| rev_proc_rate = kSampleRate32kHz;
|
| @@ -624,6 +651,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
| MaybeUpdateHistograms();
|
|
|
| AudioBuffer* ca = capture_audio_.get(); // For brevity.
|
| +
|
| if (use_new_agc_ && gain_control_->is_enabled()) {
|
| agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(),
|
| fwd_proc_format_.num_frames());
|
| @@ -634,6 +662,11 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
| ca->SplitIntoFrequencyBands();
|
| }
|
|
|
| + if (intelligibility_enabled_) {
|
| + intelligibility_enhancer_->AnalyzeCaptureAudio(
|
| + ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());
|
| + }
|
| +
|
| if (beamformer_enabled_) {
|
| beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());
|
| ca->set_num_channels(1);
|
| @@ -684,50 +717,81 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
|
|
| int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
|
| int samples_per_channel,
|
| - int sample_rate_hz,
|
| + int rev_sample_rate_hz,
|
| ChannelLayout layout) {
|
| const StreamConfig reverse_config = {
|
| - sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
|
| + rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
|
| };
|
| if (samples_per_channel != reverse_config.num_frames()) {
|
| return kBadDataLengthError;
|
| }
|
| - return AnalyzeReverseStream(data, reverse_config);
|
| + return AnalyzeReverseStream(data, reverse_config, reverse_config);
|
| +}
|
| +
|
| +int AudioProcessingImpl::ProcessReverseStream(
|
| + const float* const* src,
|
| + const StreamConfig& reverse_input_config,
|
| + const StreamConfig& reverse_output_config,
|
| + float* const* dest) {
|
| + RETURN_ON_ERR(
|
| + AnalyzeReverseStream(src, reverse_input_config, reverse_output_config));
|
| + if (is_rev_processed()) {
|
| + render_audio_->CopyTo(api_format_.reverse_output_stream(), dest);
|
| + } else if (rev_conversion_needed()) {
|
| + render_converter_->Convert(src, reverse_input_config.num_samples(), dest,
|
| + reverse_output_config.num_samples());
|
| + } else {
|
| + CopyAudioIfNeeded(src, reverse_input_config.num_frames(),
|
| + reverse_input_config.num_channels(), dest);
|
| + }
|
| +
|
| + return kNoError;
|
| }
|
|
|
| int AudioProcessingImpl::AnalyzeReverseStream(
|
| - const float* const* data,
|
| - const StreamConfig& reverse_config) {
|
| + const float* const* src,
|
| + const StreamConfig& reverse_input_config,
|
| + const StreamConfig& reverse_output_config) {
|
| CriticalSectionScoped crit_scoped(crit_);
|
| - if (data == NULL) {
|
| + if (src == NULL) {
|
| return kNullPointerError;
|
| }
|
|
|
| - if (reverse_config.num_channels() <= 0) {
|
| + if (reverse_input_config.num_channels() <= 0) {
|
| return kBadNumberChannelsError;
|
| }
|
|
|
| ProcessingConfig processing_config = api_format_;
|
| - processing_config.reverse_stream() = reverse_config;
|
| + processing_config.reverse_input_stream() = reverse_input_config;
|
| + processing_config.reverse_output_stream() = reverse_output_config;
|
|
|
| RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
|
| - assert(reverse_config.num_frames() ==
|
| - api_format_.reverse_stream().num_frames());
|
| + assert(reverse_input_config.num_frames() ==
|
| + api_format_.reverse_input_stream().num_frames());
|
|
|
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
| if (debug_file_->Open()) {
|
| event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
|
| audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
|
| const size_t channel_size =
|
| - sizeof(float) * api_format_.reverse_stream().num_frames();
|
| - for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i)
|
| - msg->add_channel(data[i], channel_size);
|
| + sizeof(float) * api_format_.reverse_input_stream().num_frames();
|
| + for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i)
|
| + msg->add_channel(src[i], channel_size);
|
| RETURN_ON_ERR(WriteMessageToDebugFile());
|
| }
|
| #endif
|
|
|
| - render_audio_->CopyFrom(data, api_format_.reverse_stream());
|
| - return AnalyzeReverseStreamLocked();
|
| + render_audio_->CopyFrom(src, api_format_.reverse_input_stream());
|
| + return ProcessReverseStreamLocked();
|
| +}
|
| +
|
| +int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
|
| + RETURN_ON_ERR(AnalyzeReverseStream(frame));
|
| + if (is_rev_processed()) {
|
| + render_audio_->InterleaveTo(frame, true);
|
| + }
|
| +
|
| + return kNoError;
|
| }
|
|
|
| int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
| @@ -752,12 +816,18 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
| }
|
|
|
| ProcessingConfig processing_config = api_format_;
|
| - processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_);
|
| - processing_config.reverse_stream().set_num_channels(frame->num_channels_);
|
| + processing_config.reverse_input_stream().set_sample_rate_hz(
|
| + frame->sample_rate_hz_);
|
| + processing_config.reverse_input_stream().set_num_channels(
|
| + frame->num_channels_);
|
| + processing_config.reverse_output_stream().set_sample_rate_hz(
|
| + frame->sample_rate_hz_);
|
| + processing_config.reverse_output_stream().set_num_channels(
|
| + frame->num_channels_);
|
|
|
| RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
|
| if (frame->samples_per_channel_ !=
|
| - api_format_.reverse_stream().num_frames()) {
|
| + api_format_.reverse_input_stream().num_frames()) {
|
| return kBadDataLengthError;
|
| }
|
|
|
| @@ -771,23 +841,32 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
| RETURN_ON_ERR(WriteMessageToDebugFile());
|
| }
|
| #endif
|
| -
|
| render_audio_->DeinterleaveFrom(frame);
|
| - return AnalyzeReverseStreamLocked();
|
| + return ProcessReverseStreamLocked();
|
| }
|
|
|
| -int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
|
| +int AudioProcessingImpl::ProcessReverseStreamLocked() {
|
| AudioBuffer* ra = render_audio_.get(); // For brevity.
|
| if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) {
|
| ra->SplitIntoFrequencyBands();
|
| }
|
|
|
| + if (intelligibility_enabled_) {
|
| + intelligibility_enhancer_->ProcessRenderAudio(
|
| + ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());
|
| + }
|
| +
|
| RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
|
| RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
|
| if (!use_new_agc_) {
|
| RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
|
| }
|
|
|
| + if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz &&
|
| + is_rev_processed()) {
|
| + ra->MergeFrequencyBands();
|
| + }
|
| +
|
| return kNoError;
|
| }
|
|
|
| @@ -1004,6 +1083,15 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
|
| return false;
|
| }
|
|
|
| +bool AudioProcessingImpl::is_rev_processed() const {
|
| + return intelligibility_enabled_ && intelligibility_enhancer_->active();
|
| +}
|
| +
|
| +bool AudioProcessingImpl::rev_conversion_needed() const {
|
| + return (api_format_.reverse_input_stream() !=
|
| + api_format_.reverse_output_stream());
|
| +}
|
| +
|
| void AudioProcessingImpl::InitializeExperimentalAgc() {
|
| if (use_new_agc_) {
|
| if (!agc_manager_.get()) {
|
| @@ -1036,6 +1124,16 @@ void AudioProcessingImpl::InitializeBeamformer() {
|
| }
|
| }
|
|
|
| +void AudioProcessingImpl::InitializeIntelligibility() {
|
| + if (intelligibility_enabled_) {
|
| + IntelligibilityEnhancer::Config config;
|
| + config.sample_rate_hz = split_rate_;
|
| + config.num_capture_channels = capture_audio_->num_channels();
|
| + config.num_render_channels = render_audio_->num_channels();
|
| + intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
|
| + }
|
| +}
|
| +
|
| void AudioProcessingImpl::MaybeUpdateHistograms() {
|
| static const int kMinDiffDelayMs = 60;
|
|
|
| @@ -1134,9 +1232,12 @@ int AudioProcessingImpl::WriteInitMessage() {
|
| msg->set_sample_rate(api_format_.input_stream().sample_rate_hz());
|
| msg->set_num_input_channels(api_format_.input_stream().num_channels());
|
| msg->set_num_output_channels(api_format_.output_stream().num_channels());
|
| - msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels());
|
| - msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz());
|
| + msg->set_num_reverse_channels(
|
| + api_format_.reverse_input_stream().num_channels());
|
| + msg->set_reverse_sample_rate(
|
| + api_format_.reverse_input_stream().sample_rate_hz());
|
| msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz());
|
| + // TODO(ekmeyerson): Add reverse output fields to event_msg_.
|
|
|
| int err = WriteMessageToDebugFile();
|
| if (err != kNoError) {
|
|
|