Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Unified Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fix Mac Error (3) Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/audio_processing_impl.cc
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 81d6c70be9131a4a3e263f6738e95369acdd3327..c9e4ddc10c5f2aa7a330dc283ea5cd114161075f 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -15,8 +15,9 @@
#include "webrtc/base/checks.h"
#include "webrtc/base/platform_file.h"
-#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/common_audio/audio_converter.h"
#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
extern "C" {
#include "webrtc/modules/audio_processing/aec/aec_core.h"
@@ -29,6 +30,7 @@ extern "C" {
#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
#include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#include "webrtc/modules/audio_processing/level_estimator_impl.h"
#include "webrtc/modules/audio_processing/noise_suppression_impl.h"
#include "webrtc/modules/audio_processing/processing_component.h"
@@ -184,6 +186,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
#endif
api_format_({{{kSampleRate16kHz, 1, false},
{kSampleRate16kHz, 1, false},
+ {kSampleRate16kHz, 1, false},
{kSampleRate16kHz, 1, false}}}),
fwd_proc_format_(kSampleRate16kHz),
rev_proc_format_(kSampleRate16kHz, 1),
@@ -210,7 +213,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
#endif
beamformer_enabled_(config.Get<Beamforming>().enabled),
beamformer_(beamformer),
- array_geometry_(config.Get<Beamforming>().array_geometry) {
+ array_geometry_(config.Get<Beamforming>().array_geometry),
+ intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_);
@@ -282,11 +286,17 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
ChannelLayout output_layout,
ChannelLayout reverse_layout) {
const ProcessingConfig processing_config = {
- {{input_sample_rate_hz, ChannelsFromLayout(input_layout),
+ {{input_sample_rate_hz,
+ ChannelsFromLayout(input_layout),
LayoutHasKeyboard(input_layout)},
- {output_sample_rate_hz, ChannelsFromLayout(output_layout),
+ {output_sample_rate_hz,
+ ChannelsFromLayout(output_layout),
LayoutHasKeyboard(output_layout)},
- {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout),
+ {reverse_sample_rate_hz,
+ ChannelsFromLayout(reverse_layout),
+ LayoutHasKeyboard(reverse_layout)},
+ {reverse_sample_rate_hz,
+ ChannelsFromLayout(reverse_layout),
LayoutHasKeyboard(reverse_layout)}}};
return Initialize(processing_config);
@@ -301,14 +311,28 @@ int AudioProcessingImpl::InitializeLocked() {
const int fwd_audio_buffer_channels =
beamformer_enabled_ ? api_format_.input_stream().num_channels()
: api_format_.output_stream().num_channels();
- if (api_format_.reverse_stream().num_channels() > 0) {
+ const int rev_audio_buffer_out_num_frames =
+ api_format_.reverse_output_stream().num_frames() == 0
+ ? rev_proc_format_.num_frames()
+ : api_format_.reverse_output_stream().num_frames();
+ if (api_format_.reverse_input_stream().num_channels() > 0) {
render_audio_.reset(new AudioBuffer(
- api_format_.reverse_stream().num_frames(),
- api_format_.reverse_stream().num_channels(),
+ api_format_.reverse_input_stream().num_frames(),
+ api_format_.reverse_input_stream().num_channels(),
rev_proc_format_.num_frames(), rev_proc_format_.num_channels(),
- rev_proc_format_.num_frames()));
+ rev_audio_buffer_out_num_frames));
+ if (rev_conversion_needed()) {
+ render_converter_ = AudioConverter::Create(
+ api_format_.reverse_input_stream().num_channels(),
+ api_format_.reverse_input_stream().num_frames(),
+ api_format_.reverse_output_stream().num_channels(),
+ api_format_.reverse_output_stream().num_frames());
+ } else {
+ render_converter_.reset(nullptr);
+ }
} else {
render_audio_.reset(nullptr);
+ render_converter_.reset(nullptr);
}
capture_audio_.reset(new AudioBuffer(
api_format_.input_stream().num_frames(),
@@ -329,6 +353,8 @@ int AudioProcessingImpl::InitializeLocked() {
InitializeBeamformer();
+ InitializeIntelligibility();
+
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
int err = WriteInitMessage();
@@ -396,7 +422,8 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
// ...the forward stream is at 8 kHz.
rev_proc_rate = kSampleRate8kHz;
} else {
- if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) {
+ if (api_format_.reverse_input_stream().sample_rate_hz() ==
+ kSampleRate32kHz) {
// ...or the input is at 32 kHz, in which case we use the splitting
// filter rather than the resampler.
rev_proc_rate = kSampleRate32kHz;
@@ -624,6 +651,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
MaybeUpdateHistograms();
AudioBuffer* ca = capture_audio_.get(); // For brevity.
+
if (use_new_agc_ && gain_control_->is_enabled()) {
agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(),
fwd_proc_format_.num_frames());
@@ -634,6 +662,11 @@ int AudioProcessingImpl::ProcessStreamLocked() {
ca->SplitIntoFrequencyBands();
}
+ if (intelligibility_enabled_) {
+ intelligibility_enhancer_->AnalyzeCaptureAudio(
+ ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());
+ }
+
if (beamformer_enabled_) {
beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());
ca->set_num_channels(1);
@@ -684,50 +717,81 @@ int AudioProcessingImpl::ProcessStreamLocked() {
int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
- int sample_rate_hz,
+ int rev_sample_rate_hz,
ChannelLayout layout) {
const StreamConfig reverse_config = {
- sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
+ rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
};
if (samples_per_channel != reverse_config.num_frames()) {
return kBadDataLengthError;
}
- return AnalyzeReverseStream(data, reverse_config);
+ return AnalyzeReverseStream(data, reverse_config, reverse_config);
+}
+
+int AudioProcessingImpl::ProcessReverseStream(
+ const float* const* src,
+ const StreamConfig& reverse_input_config,
+ const StreamConfig& reverse_output_config,
+ float* const* dest) {
+ RETURN_ON_ERR(
+ AnalyzeReverseStream(src, reverse_input_config, reverse_output_config));
+ if (is_rev_processed()) {
+ render_audio_->CopyTo(api_format_.reverse_output_stream(), dest);
+ } else if (rev_conversion_needed()) {
+ render_converter_->Convert(src, reverse_input_config.num_samples(), dest,
+ reverse_output_config.num_samples());
+ } else {
+ CopyAudioIfNeeded(src, reverse_input_config.num_frames(),
+ reverse_input_config.num_channels(), dest);
+ }
+
+ return kNoError;
}
int AudioProcessingImpl::AnalyzeReverseStream(
- const float* const* data,
- const StreamConfig& reverse_config) {
+ const float* const* src,
+ const StreamConfig& reverse_input_config,
+ const StreamConfig& reverse_output_config) {
CriticalSectionScoped crit_scoped(crit_);
- if (data == NULL) {
+ if (src == NULL) {
return kNullPointerError;
}
- if (reverse_config.num_channels() <= 0) {
+ if (reverse_input_config.num_channels() <= 0) {
return kBadNumberChannelsError;
}
ProcessingConfig processing_config = api_format_;
- processing_config.reverse_stream() = reverse_config;
+ processing_config.reverse_input_stream() = reverse_input_config;
+ processing_config.reverse_output_stream() = reverse_output_config;
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
- assert(reverse_config.num_frames() ==
- api_format_.reverse_stream().num_frames());
+ assert(reverse_input_config.num_frames() ==
+ api_format_.reverse_input_stream().num_frames());
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
const size_t channel_size =
- sizeof(float) * api_format_.reverse_stream().num_frames();
- for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i)
- msg->add_channel(data[i], channel_size);
+ sizeof(float) * api_format_.reverse_input_stream().num_frames();
+ for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i)
+ msg->add_channel(src[i], channel_size);
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
- render_audio_->CopyFrom(data, api_format_.reverse_stream());
- return AnalyzeReverseStreamLocked();
+ render_audio_->CopyFrom(src, api_format_.reverse_input_stream());
+ return ProcessReverseStreamLocked();
+}
+
+int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
+ RETURN_ON_ERR(AnalyzeReverseStream(frame));
+ if (is_rev_processed()) {
+ render_audio_->InterleaveTo(frame, true);
+ }
+
+ return kNoError;
}
int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
@@ -752,12 +816,18 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
}
ProcessingConfig processing_config = api_format_;
- processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_);
- processing_config.reverse_stream().set_num_channels(frame->num_channels_);
+ processing_config.reverse_input_stream().set_sample_rate_hz(
+ frame->sample_rate_hz_);
+ processing_config.reverse_input_stream().set_num_channels(
+ frame->num_channels_);
+ processing_config.reverse_output_stream().set_sample_rate_hz(
+ frame->sample_rate_hz_);
+ processing_config.reverse_output_stream().set_num_channels(
+ frame->num_channels_);
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
if (frame->samples_per_channel_ !=
- api_format_.reverse_stream().num_frames()) {
+ api_format_.reverse_input_stream().num_frames()) {
return kBadDataLengthError;
}
@@ -771,23 +841,32 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
-
render_audio_->DeinterleaveFrom(frame);
- return AnalyzeReverseStreamLocked();
+ return ProcessReverseStreamLocked();
}
-int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
+int AudioProcessingImpl::ProcessReverseStreamLocked() {
AudioBuffer* ra = render_audio_.get(); // For brevity.
if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) {
ra->SplitIntoFrequencyBands();
}
+ if (intelligibility_enabled_) {
+ intelligibility_enhancer_->ProcessRenderAudio(
+ ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());
+ }
+
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
if (!use_new_agc_) {
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
}
+ if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz &&
+ is_rev_processed()) {
+ ra->MergeFrequencyBands();
+ }
+
return kNoError;
}
@@ -1004,6 +1083,15 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
return false;
}
+bool AudioProcessingImpl::is_rev_processed() const {
+ return intelligibility_enabled_ && intelligibility_enhancer_->active();
+}
+
+bool AudioProcessingImpl::rev_conversion_needed() const {
+ return (api_format_.reverse_input_stream() !=
+ api_format_.reverse_output_stream());
+}
+
void AudioProcessingImpl::InitializeExperimentalAgc() {
if (use_new_agc_) {
if (!agc_manager_.get()) {
@@ -1036,6 +1124,16 @@ void AudioProcessingImpl::InitializeBeamformer() {
}
}
+void AudioProcessingImpl::InitializeIntelligibility() {
+ if (intelligibility_enabled_) {
+ IntelligibilityEnhancer::Config config;
+ config.sample_rate_hz = split_rate_;
+ config.num_capture_channels = capture_audio_->num_channels();
+ config.num_render_channels = render_audio_->num_channels();
+ intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
+ }
+}
+
void AudioProcessingImpl::MaybeUpdateHistograms() {
static const int kMinDiffDelayMs = 60;
@@ -1134,9 +1232,12 @@ int AudioProcessingImpl::WriteInitMessage() {
msg->set_sample_rate(api_format_.input_stream().sample_rate_hz());
msg->set_num_input_channels(api_format_.input_stream().num_channels());
msg->set_num_output_channels(api_format_.output_stream().num_channels());
- msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels());
- msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz());
+ msg->set_num_reverse_channels(
+ api_format_.reverse_input_stream().num_channels());
+ msg->set_reverse_sample_rate(
+ api_format_.reverse_input_stream().sample_rate_hz());
msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz());
+ // TODO(ekmeyerson): Add reverse output fields to event_msg_.
int err = WriteMessageToDebugFile();
if (err != kNoError) {
« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.h ('k') | webrtc/modules/audio_processing/include/audio_processing.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698