Index: webrtc/modules/audio_processing/audio_processing_impl.cc |
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc |
index bbfb771182cc7196f0e8e0ca9fb63860617d6b8f..47dc30df1ff73e543c74d5c7d9f2aae994f122fb 100644 |
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc |
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc |
@@ -15,8 +15,9 @@ |
#include "webrtc/base/checks.h" |
#include "webrtc/base/platform_file.h" |
-#include "webrtc/common_audio/include/audio_util.h" |
+#include "webrtc/common_audio/audio_converter.h" |
#include "webrtc/common_audio/channel_buffer.h" |
+#include "webrtc/common_audio/include/audio_util.h" |
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
extern "C" { |
#include "webrtc/modules/audio_processing/aec/aec_core.h" |
@@ -29,6 +30,7 @@ extern "C" { |
#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" |
#include "webrtc/modules/audio_processing/gain_control_impl.h" |
#include "webrtc/modules/audio_processing/high_pass_filter_impl.h" |
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" |
#include "webrtc/modules/audio_processing/level_estimator_impl.h" |
#include "webrtc/modules/audio_processing/noise_suppression_impl.h" |
#include "webrtc/modules/audio_processing/processing_component.h" |
@@ -184,6 +186,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, |
#endif |
api_format_({{{kSampleRate16kHz, 1, false}, |
{kSampleRate16kHz, 1, false}, |
+ {kSampleRate16kHz, 1, false}, |
{kSampleRate16kHz, 1, false}}}), |
fwd_proc_format_(kSampleRate16kHz), |
rev_proc_format_(kSampleRate16kHz, 1), |
@@ -210,7 +213,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, |
#endif |
beamformer_enabled_(config.Get<Beamforming>().enabled), |
beamformer_(beamformer), |
- array_geometry_(config.Get<Beamforming>().array_geometry) { |
+ array_geometry_(config.Get<Beamforming>().array_geometry), |
+ intelligibility_enabled_(config.Get<Intelligibility>().enabled) { |
echo_cancellation_ = new EchoCancellationImpl(this, crit_); |
component_list_.push_back(echo_cancellation_); |
@@ -282,11 +286,17 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz, |
ChannelLayout output_layout, |
ChannelLayout reverse_layout) { |
const ProcessingConfig processing_config = { |
- {{input_sample_rate_hz, ChannelsFromLayout(input_layout), |
+ {{input_sample_rate_hz, |
+ ChannelsFromLayout(input_layout), |
LayoutHasKeyboard(input_layout)}, |
- {output_sample_rate_hz, ChannelsFromLayout(output_layout), |
+ {output_sample_rate_hz, |
+ ChannelsFromLayout(output_layout), |
LayoutHasKeyboard(output_layout)}, |
- {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout), |
+ {reverse_sample_rate_hz, |
+ ChannelsFromLayout(reverse_layout), |
+ LayoutHasKeyboard(reverse_layout)}, |
+ {reverse_sample_rate_hz, |
+ ChannelsFromLayout(reverse_layout), |
LayoutHasKeyboard(reverse_layout)}}}; |
return Initialize(processing_config); |
@@ -301,14 +311,28 @@ int AudioProcessingImpl::InitializeLocked() { |
const int fwd_audio_buffer_channels = |
beamformer_enabled_ ? api_format_.input_stream().num_channels() |
: api_format_.output_stream().num_channels(); |
- if (api_format_.reverse_stream().num_channels() > 0) { |
+ const int rev_audio_buffer_out_num_frames = |
+ api_format_.reverse_output_stream().num_frames() == 0 |
+ ? rev_proc_format_.num_frames() |
+ : api_format_.reverse_output_stream().num_frames(); |
+ if (api_format_.reverse_input_stream().num_channels() > 0) { |
render_audio_.reset(new AudioBuffer( |
- api_format_.reverse_stream().num_frames(), |
- api_format_.reverse_stream().num_channels(), |
+ api_format_.reverse_input_stream().num_frames(), |
+ api_format_.reverse_input_stream().num_channels(), |
rev_proc_format_.num_frames(), rev_proc_format_.num_channels(), |
- rev_proc_format_.num_frames())); |
+ rev_audio_buffer_out_num_frames)); |
+ if (rev_conversion_needed()) { |
+ render_converter_ = AudioConverter::Create( |
+ api_format_.reverse_input_stream().num_channels(), |
+ api_format_.reverse_input_stream().num_frames(), |
+ api_format_.reverse_output_stream().num_channels(), |
+ api_format_.reverse_output_stream().num_frames()); |
+ } else { |
+ render_converter_.reset(nullptr); |
+ } |
} else { |
render_audio_.reset(nullptr); |
+ render_converter_.reset(nullptr); |
} |
capture_audio_.reset(new AudioBuffer( |
api_format_.input_stream().num_frames(), |
@@ -329,6 +353,8 @@ int AudioProcessingImpl::InitializeLocked() { |
InitializeBeamformer(); |
+ InitializeIntelligibility(); |
+ |
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
if (debug_file_->Open()) { |
int err = WriteInitMessage(); |
@@ -396,7 +422,8 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { |
// ...the forward stream is at 8 kHz. |
rev_proc_rate = kSampleRate8kHz; |
} else { |
- if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) { |
+ if (api_format_.reverse_input_stream().sample_rate_hz() == |
+ kSampleRate32kHz) { |
// ...or the input is at 32 kHz, in which case we use the splitting |
// filter rather than the resampler. |
rev_proc_rate = kSampleRate32kHz; |
@@ -623,6 +650,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { |
MaybeUpdateHistograms(); |
AudioBuffer* ca = capture_audio_.get(); // For brevity. |
+ |
if (use_new_agc_ && gain_control_->is_enabled()) { |
agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), |
fwd_proc_format_.num_frames()); |
@@ -633,6 +661,11 @@ int AudioProcessingImpl::ProcessStreamLocked() { |
ca->SplitIntoFrequencyBands(); |
} |
+ if (intelligibility_enabled_) { |
+ intelligibility_enhancer_->AnalyzeCaptureAudio( |
+ ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels()); |
+ } |
+ |
if (beamformer_enabled_) { |
beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); |
ca->set_num_channels(1); |
@@ -683,50 +716,82 @@ int AudioProcessingImpl::ProcessStreamLocked() { |
int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, |
int samples_per_channel, |
- int sample_rate_hz, |
+ int rev_sample_rate_hz, |
ChannelLayout layout) { |
const StreamConfig reverse_config = { |
- sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), |
+ rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), |
}; |
if (samples_per_channel != reverse_config.num_frames()) { |
return kBadDataLengthError; |
} |
- return AnalyzeReverseStream(data, reverse_config); |
+ return AnalyzeReverseStream(data, reverse_config, reverse_config, data); |
+} |
+ |
+int AudioProcessingImpl::ProcessReverseStream( |
+ const float* const* src, |
+ const StreamConfig& reverse_input_config, |
+ const StreamConfig& reverse_output_config, |
+ float* const* dest) { |
+ RETURN_ON_ERR(AnalyzeReverseStream(src, reverse_input_config, |
+ reverse_output_config, dest)); |
+ if (is_rev_processed()) { |
+ render_audio_->CopyTo(api_format_.reverse_output_stream(), dest); |
+ } else if (rev_conversion_needed()) { |
+ render_converter_->Convert(src, reverse_input_config.num_frames(), |
+ dest, reverse_output_config.num_frames()); |
+ } else { |
+ CopyAudio<float>(src, reverse_input_config.num_frames(), |
Andrew MacDonald
2015/08/05 06:18:18
You shouldn't need the explicit float; can be dedu
ekm
2015/08/07 06:06:00
Done.
|
+ reverse_input_config.num_channels(), dest); |
+ } |
+ |
+ return kNoError; |
} |
int AudioProcessingImpl::AnalyzeReverseStream( |
- const float* const* data, |
- const StreamConfig& reverse_config) { |
+ const float* const* src, |
+ const StreamConfig& reverse_input_config, |
+ const StreamConfig& reverse_output_config, |
+ const float* const* dest) { |
CriticalSectionScoped crit_scoped(crit_); |
- if (data == NULL) { |
+ if (src == NULL) { |
return kNullPointerError; |
} |
- if (reverse_config.num_channels() <= 0) { |
+ if (reverse_input_config.num_channels() <= 0) { |
return kBadNumberChannelsError; |
} |
ProcessingConfig processing_config = api_format_; |
- processing_config.reverse_stream() = reverse_config; |
+ processing_config.reverse_input_stream() = reverse_input_config; |
+ processing_config.reverse_output_stream() = reverse_output_config; |
RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); |
- assert(reverse_config.num_frames() == |
- api_format_.reverse_stream().num_frames()); |
+ assert(reverse_input_config.num_frames() == |
+ api_format_.reverse_input_stream().num_frames()); |
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
if (debug_file_->Open()) { |
event_msg_->set_type(audioproc::Event::REVERSE_STREAM); |
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); |
const size_t channel_size = |
- sizeof(float) * api_format_.reverse_stream().num_frames(); |
- for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i) |
- msg->add_channel(data[i], channel_size); |
+ sizeof(float) * api_format_.reverse_input_stream().num_frames(); |
+ for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i) |
+ msg->add_channel(src[i], channel_size); |
RETURN_ON_ERR(WriteMessageToDebugFile()); |
} |
#endif |
- render_audio_->CopyFrom(data, api_format_.reverse_stream()); |
- return AnalyzeReverseStreamLocked(); |
+ render_audio_->CopyFrom(src, api_format_.reverse_input_stream()); |
+ return ProcessReverseStreamLocked(); |
+} |
+ |
+int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { |
+ RETURN_ON_ERR(AnalyzeReverseStream(frame)); |
+ if (is_rev_processed()) { |
+ render_audio_->InterleaveTo(frame, true); |
+ } |
+ |
+ return kNoError; |
} |
int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
@@ -751,12 +816,18 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
} |
ProcessingConfig processing_config = api_format_; |
- processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_); |
- processing_config.reverse_stream().set_num_channels(frame->num_channels_); |
+ processing_config.reverse_input_stream().set_sample_rate_hz( |
+ frame->sample_rate_hz_); |
+ processing_config.reverse_input_stream().set_num_channels( |
+ frame->num_channels_); |
+ processing_config.reverse_output_stream().set_sample_rate_hz( |
+ frame->sample_rate_hz_); |
+ processing_config.reverse_output_stream().set_num_channels( |
+ frame->num_channels_); |
RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); |
if (frame->samples_per_channel_ != |
- api_format_.reverse_stream().num_frames()) { |
+ api_format_.reverse_input_stream().num_frames()) { |
return kBadDataLengthError; |
} |
@@ -770,23 +841,32 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { |
RETURN_ON_ERR(WriteMessageToDebugFile()); |
} |
#endif |
- |
render_audio_->DeinterleaveFrom(frame); |
- return AnalyzeReverseStreamLocked(); |
+ return ProcessReverseStreamLocked(); |
} |
-int AudioProcessingImpl::AnalyzeReverseStreamLocked() { |
+int AudioProcessingImpl::ProcessReverseStreamLocked() { |
AudioBuffer* ra = render_audio_.get(); // For brevity. |
if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { |
ra->SplitIntoFrequencyBands(); |
} |
+ if (intelligibility_enabled_) { |
+ intelligibility_enhancer_->ProcessRenderAudio( |
+ ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels()); |
+ } |
+ |
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); |
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); |
if (!use_new_agc_) { |
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); |
} |
+ if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz && |
+ is_rev_processed()) { |
+ ra->MergeFrequencyBands(); |
+ } |
+ |
return kNoError; |
} |
@@ -1003,6 +1083,20 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { |
return false; |
} |
+bool AudioProcessingImpl::is_rev_processed() const { |
+ return intelligibility_enabled_ && intelligibility_enhancer_->active(); |
+} |
+ |
+bool AudioProcessingImpl::rev_conversion_needed() const { |
+ // Only supports conversions supported by AudioConverter. |
+ return ((api_format_.reverse_input_stream() != |
+ api_format_.reverse_output_stream()) && |
+ ((api_format_.reverse_output_stream().num_channels() == |
+ api_format_.reverse_input_stream().num_channels()) || |
+ (api_format_.reverse_output_stream().num_channels() == 1) || |
+ (api_format_.reverse_output_stream().num_channels() == 1))); |
+} |
+ |
void AudioProcessingImpl::InitializeExperimentalAgc() { |
if (use_new_agc_) { |
if (!agc_manager_.get()) { |
@@ -1035,6 +1129,16 @@ void AudioProcessingImpl::InitializeBeamformer() { |
} |
} |
+void AudioProcessingImpl::InitializeIntelligibility() { |
+ if (intelligibility_enabled_) { |
+ IntelligibilityEnhancer::Config config; |
+ config.sample_rate_hz = split_rate_; |
+ config.num_capture_channels = capture_audio_->num_channels(); |
+ config.num_render_channels = render_audio_->num_channels(); |
+ intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config)); |
+ } |
+} |
+ |
void AudioProcessingImpl::MaybeUpdateHistograms() { |
static const int kMinDiffDelayMs = 60; |
@@ -1133,9 +1237,12 @@ int AudioProcessingImpl::WriteInitMessage() { |
msg->set_sample_rate(api_format_.input_stream().sample_rate_hz()); |
msg->set_num_input_channels(api_format_.input_stream().num_channels()); |
msg->set_num_output_channels(api_format_.output_stream().num_channels()); |
- msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels()); |
- msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz()); |
+ msg->set_num_reverse_channels( |
+ api_format_.reverse_input_stream().num_channels()); |
+ msg->set_reverse_sample_rate( |
+ api_format_.reverse_input_stream().sample_rate_hz()); |
msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz()); |
+ // TODO(ekmeyerson): Add reverse output fields to event_msg_. |
int err = WriteMessageToDebugFile(); |
if (err != kNoError) { |