Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Unified Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2304123002: Cleaned up and revised the handling of resampling and bandsplitting in APM and (Closed)
Patch Set: Updated the test that required that no initialization should be done if APM is called with the defa… Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/audio_processing_impl.cc
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 222f749fb7b876fc55e53588a6d0914a192f90f1..bd311ad6b1615c75762f85061a8da5b7f73b03d2 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -82,15 +82,6 @@ const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
namespace {
-const int kInternalNativeRates[] = {AudioProcessing::kSampleRate8kHz,
- AudioProcessing::kSampleRate16kHz,
-#ifdef WEBRTC_ARCH_ARM_FAMILY
- AudioProcessing::kSampleRate32kHz};
-#else
- AudioProcessing::kSampleRate32kHz,
- AudioProcessing::kSampleRate48kHz};
-#endif // WEBRTC_ARCH_ARM_FAMILY
-
static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
@@ -105,18 +96,33 @@ static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
return false;
}
-bool is_multi_band(int sample_rate_hz) {
+bool SampleRateSupportsMultiBand(int sample_rate_hz) {
return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
}
-int ClosestHigherNativeRate(int min_proc_rate) {
- for (int rate : kInternalNativeRates) {
- if (rate >= min_proc_rate) {
+int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {
+#ifdef WEBRTC_ARCH_ARM_FAMILY
+ const int kMaxSplittingNativeProcessRate = AudioProcessing::kSampleRate32kHz;
+#else
+ const int kMaxSplittingNativeProcessRate = AudioProcessing::kSampleRate48kHz;
+#endif
+ RTC_DCHECK_LE(kMaxSplittingNativeProcessRate,
+ AudioProcessing::kMaxNativeSampleRateHz);
+ const int uppermost_native_rate = band_splitting_required
+ ? kMaxSplittingNativeProcessRate
+ : AudioProcessing::kSampleRate48kHz;
+
+ for (auto rate : AudioProcessing::kNativeSampleRatesHz) {
+ if (rate >= uppermost_native_rate) {
+ return uppermost_native_rate;
+ }
+ if (rate >= minimum_rate) {
return rate;
}
}
- return kInternalNativeRates[arraysize(kInternalNativeRates) - 1];
+ RTC_NOTREACHED();
+ return uppermost_native_rate;
}
} // namespace
@@ -124,6 +130,87 @@ int ClosestHigherNativeRate(int min_proc_rate) {
// Throughout webrtc, it's assumed that success is represented by zero.
static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
+AudioProcessingImpl::ApmSubmoduleStates::ApmSubmoduleStates() {}
+
+bool AudioProcessingImpl::ApmSubmoduleStates::Update(
+ bool high_pass_filter_enabled,
+ bool echo_canceller_enabled,
+ bool mobile_echo_controller_enabled,
+ bool noise_suppressor_enabled,
+ bool intelligibility_enhancer_enabled,
+ bool beamformer_enabled,
+ bool adaptive_gain_controller_enabled,
+ bool level_controller_enabled,
+ bool voice_activity_detector_enabled,
+ bool level_estimator_enabled,
+ bool transient_suppressor_enabled) {
+ bool changed = false;
+ changed |= (high_pass_filter_enabled != high_pass_filter_enabled_);
+ changed |= (echo_canceller_enabled != echo_canceller_enabled_);
+ changed |=
+ (mobile_echo_controller_enabled != mobile_echo_controller_enabled_);
+ changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
+ changed |=
+ (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
+ changed |= (beamformer_enabled != beamformer_enabled_);
+ changed |=
+ (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
+ changed |= (level_controller_enabled != level_controller_enabled_);
+ changed |= (level_estimator_enabled != level_estimator_enabled_);
+ changed |=
+ (voice_activity_detector_enabled != voice_activity_detector_enabled_);
+ changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
+ if (changed) {
+ high_pass_filter_enabled_ = high_pass_filter_enabled;
+ echo_canceller_enabled_ = echo_canceller_enabled;
+ mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
+ noise_suppressor_enabled_ = noise_suppressor_enabled;
+ intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
+ beamformer_enabled_ = beamformer_enabled;
+ adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
+ level_controller_enabled_ = level_controller_enabled;
+ level_estimator_enabled_ = level_estimator_enabled;
+ voice_activity_detector_enabled_ = voice_activity_detector_enabled;
+ transient_suppressor_enabled_ = transient_suppressor_enabled;
+ }
+
+ changed |= first_update_;
+ first_update_ = false;
+ return changed;
+}
+
+bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive()
+ const {
+#if WEBRTC_INTELLIGIBILITY_ENHANCER
+ return CaptureMultiBandProcessingActive() ||
+ intelligibility_enhancer_enabled_ || voice_activity_detector_enabled_;
+#else
+ return CaptureMultiBandProcessingActive() || voice_activity_detector_enabled_;
+#endif
+}
+
+bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
+ const {
+ return high_pass_filter_enabled_ || echo_canceller_enabled_ ||
+ mobile_echo_controller_enabled_ || noise_suppressor_enabled_ ||
+ beamformer_enabled_ || adaptive_gain_controller_enabled_;
+}
+
+bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandSubModulesActive()
+ const {
+ return RenderMultiBandProcessingActive() || echo_canceller_enabled_ ||
+ mobile_echo_controller_enabled_ || adaptive_gain_controller_enabled_;
+}
+
+bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive()
+ const {
+#if WEBRTC_INTELLIGIBILITY_ENHANCER
+ return intelligibility_enhancer_enabled_;
+#else
+ return false;
+#endif
+}
+
struct AudioProcessingImpl::ApmPublicSubmodules {
ApmPublicSubmodules() {}
// Accessed externally of APM without any lock acquired.
@@ -275,12 +362,13 @@ int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
int AudioProcessingImpl::MaybeInitializeRender(
const ProcessingConfig& processing_config) {
- return MaybeInitialize(processing_config);
+ return MaybeInitialize(processing_config, false);
}
int AudioProcessingImpl::MaybeInitializeCapture(
- const ProcessingConfig& processing_config) {
- return MaybeInitialize(processing_config);
+ const ProcessingConfig& processing_config,
+ bool force_initialization) {
+ return MaybeInitialize(processing_config, force_initialization);
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
@@ -300,9 +388,10 @@ AudioProcessingImpl::ApmDebugDumpState::~ApmDebugDumpState() {}
// Calls InitializeLocked() if any of the audio parameters have changed from
// their current values (needs to be called while holding the crit_render_lock).
int AudioProcessingImpl::MaybeInitialize(
- const ProcessingConfig& processing_config) {
+ const ProcessingConfig& processing_config,
+ bool force_initialization) {
// Called from both threads. Thread check is therefore not possible.
- if (processing_config == formats_.api_format) {
+ if (processing_config == formats_.api_format && !force_initialization) {
return kNoError;
}
@@ -326,7 +415,8 @@ int AudioProcessingImpl::InitializeLocked() {
formats_.rev_proc_format.num_frames(),
formats_.rev_proc_format.num_channels(),
rev_audio_buffer_out_num_frames));
- if (rev_conversion_needed()) {
+ if (formats_.api_format.reverse_input_stream() !=
+ formats_.api_format.reverse_output_stream()) {
render_.render_converter = AudioConverter::Create(
formats_.api_format.reverse_input_stream().num_channels(),
formats_.api_format.reverse_input_stream().num_frames(),
@@ -397,17 +487,25 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
formats_.api_format = config;
- capture_nonlocked_.fwd_proc_format = StreamConfig(ClosestHigherNativeRate(
+ int fwd_proc_rate = FindNativeProcessRateToUse(
std::min(formats_.api_format.input_stream().sample_rate_hz(),
- formats_.api_format.output_stream().sample_rate_hz())));
+ formats_.api_format.output_stream().sample_rate_hz()),
+ submodule_states_.CaptureMultiBandSubModulesActive() ||
+ submodule_states_.RenderMultiBandSubModulesActive());
- int rev_proc_rate = ClosestHigherNativeRate(std::min(
- formats_.api_format.reverse_input_stream().sample_rate_hz(),
- formats_.api_format.reverse_output_stream().sample_rate_hz()));
+ capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
+
+ int rev_proc_rate = FindNativeProcessRateToUse(
+ std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),
+ formats_.api_format.reverse_output_stream().sample_rate_hz()),
+ submodule_states_.CaptureMultiBandSubModulesActive() ||
+ submodule_states_.RenderMultiBandSubModulesActive());
// TODO(aluebs): Remove this restriction once we figure out why the 3-band
// splitting filter degrades the AEC performance.
if (rev_proc_rate > kSampleRate32kHz) {
- rev_proc_rate = is_rev_processed() ? kSampleRate32kHz : kSampleRate16kHz;
+ rev_proc_rate = submodule_states_.RenderMultiBandProcessingActive()
+ ? kSampleRate32kHz
+ : kSampleRate16kHz;
}
// If the forward sample rate is 8 kHz, the reverse stream is also processed
// at this rate.
@@ -556,6 +654,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
float* const* dest) {
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
ProcessingConfig processing_config;
+ bool reinitialization_required = false;
{
// Acquire the capture lock in order to safely call the function
// that retrieves the render side data. This function accesses apm
@@ -570,6 +669,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
}
processing_config = formats_.api_format;
+ reinitialization_required = UpdateActiveSubmoduleStates();
}
processing_config.input_stream() = input_config;
@@ -578,7 +678,8 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
{
// Do conditional reinitialization.
rtc::CritScope cs_render(&crit_render_);
- RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
+ RETURN_ON_ERR(
+ MaybeInitializeCapture(processing_config, reinitialization_required));
}
rtc::CritScope cs_capture(&crit_capture_);
assert(processing_config.input_stream().num_frames() ==
@@ -646,6 +747,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
}
ProcessingConfig processing_config;
+ bool reinitialization_required = false;
{
// Aquire lock for the access of api_format.
// The lock is released immediately due to the conditional
@@ -654,6 +756,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
// TODO(ajm): The input and output rates and channels are currently
// constrained to be identical in the int16 interface.
processing_config = formats_.api_format;
+
+ reinitialization_required = UpdateActiveSubmoduleStates();
}
processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_);
processing_config.input_stream().set_num_channels(frame->num_channels_);
@@ -663,7 +767,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
{
// Do conditional reinitialization.
rtc::CritScope cs_render(&crit_render_);
- RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
+ RETURN_ON_ERR(
+ MaybeInitializeCapture(processing_config, reinitialization_required));
}
rtc::CritScope cs_capture(&crit_capture_);
if (frame->samples_per_channel_ !=
@@ -685,7 +790,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
capture_.capture_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessStreamLocked());
- capture_.capture_audio->InterleaveTo(frame, output_copy_needed());
+ capture_.capture_audio->InterleaveTo(
+ frame, submodule_states_.CaptureMultiBandProcessingActive());
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_dump_.debug_file->is_open()) {
@@ -731,7 +837,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {
capture_nonlocked_.fwd_proc_format.num_frames());
}
- if (fwd_analysis_needed()) {
+ if (submodule_states_.CaptureMultiBandSubModulesActive() &&
+ SampleRateSupportsMultiBand(
+ capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
ca->SplitIntoFrequencyBands();
}
@@ -802,7 +910,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {
RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(
ca, echo_cancellation()->stream_has_echo()));
- if (fwd_synthesis_needed()) {
+ if (submodule_states_.CaptureMultiBandProcessingActive() &&
+ SampleRateSupportsMultiBand(
+ capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
ca->MergeFrequencyBands();
}
@@ -856,10 +966,11 @@ int AudioProcessingImpl::ProcessReverseStream(
rtc::CritScope cs(&crit_render_);
RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config,
reverse_output_config));
- if (is_rev_processed()) {
+ if (submodule_states_.RenderMultiBandProcessingActive()) {
render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
dest);
- } else if (render_check_rev_conversion_needed()) {
+ } else if (formats_.api_format.reverse_input_stream() !=
+ formats_.api_format.reverse_output_stream()) {
render_.render_converter->Convert(src, reverse_input_config.num_samples(),
dest,
reverse_output_config.num_samples());
@@ -961,15 +1072,15 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
#endif
render_.render_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessReverseStreamLocked());
- if (is_rev_processed()) {
- render_.render_audio->InterleaveTo(frame, true);
- }
+ render_.render_audio->InterleaveTo(
+ frame, submodule_states_.RenderMultiBandProcessingActive());
return kNoError;
}
int AudioProcessingImpl::ProcessReverseStreamLocked() {
AudioBuffer* ra = render_.render_audio.get(); // For brevity.
- if (rev_analysis_needed()) {
+ if (submodule_states_.RenderMultiBandSubModulesActive() &&
+ SampleRateSupportsMultiBand(formats_.rev_proc_format.sample_rate_hz())) {
ra->SplitIntoFrequencyBands();
}
@@ -988,7 +1099,8 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() {
RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
}
- if (rev_synthesis_needed()) {
+ if (submodule_states_.RenderMultiBandProcessingActive() &&
+ SampleRateSupportsMultiBand(formats_.rev_proc_format.sample_rate_hz())) {
ra->MergeFrequencyBands();
}
@@ -1122,20 +1234,14 @@ int AudioProcessingImpl::StopDebugRecording() {
}
EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->echo_cancellation.get();
}
EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->echo_control_mobile.get();
}
GainControl* AudioProcessingImpl::gain_control() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
if (constants_.use_experimental_agc) {
return public_submodules_->gain_control_for_experimental_agc.get();
}
@@ -1143,103 +1249,34 @@ GainControl* AudioProcessingImpl::gain_control() const {
}
HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->high_pass_filter.get();
}
LevelEstimator* AudioProcessingImpl::level_estimator() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->level_estimator.get();
}
NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->noise_suppression.get();
}
VoiceDetection* AudioProcessingImpl::voice_detection() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->voice_detection.get();
}
-bool AudioProcessingImpl::is_fwd_processed() const {
- // The beamformer, noise suppressor and highpass filter
- // modify the data.
- if (capture_nonlocked_.beamformer_enabled ||
- public_submodules_->high_pass_filter->is_enabled() ||
- public_submodules_->noise_suppression->is_enabled() ||
- public_submodules_->echo_cancellation->is_enabled() ||
- public_submodules_->echo_control_mobile->is_enabled() ||
- public_submodules_->gain_control->is_enabled()) {
- return true;
- }
-
- // The capture data is otherwise unchanged.
- return false;
-}
-
-bool AudioProcessingImpl::output_copy_needed() const {
- // Check if we've upmixed or downmixed the audio.
- return ((formats_.api_format.output_stream().num_channels() !=
- formats_.api_format.input_stream().num_channels()) ||
- is_fwd_processed() || capture_.transient_suppressor_enabled ||
- capture_nonlocked_.level_controller_enabled);
-}
-
-bool AudioProcessingImpl::fwd_synthesis_needed() const {
- return (is_fwd_processed() &&
- is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));
-}
-
-bool AudioProcessingImpl::fwd_analysis_needed() const {
- if (!is_fwd_processed() &&
- !public_submodules_->voice_detection->is_enabled() &&
- !capture_.transient_suppressor_enabled) {
- // Only public_submodules_->level_estimator is enabled.
- return false;
- } else if (is_multi_band(
- capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
- // Something besides public_submodules_->level_estimator is enabled, and we
- // have super-wb.
- return true;
- }
- return false;
-}
-
-bool AudioProcessingImpl::is_rev_processed() const {
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- return capture_nonlocked_.intelligibility_enabled;
-#else
- return false;
-#endif
-}
-
-bool AudioProcessingImpl::rev_synthesis_needed() const {
- return (is_rev_processed() &&
- is_multi_band(formats_.rev_proc_format.sample_rate_hz()));
-}
-
-bool AudioProcessingImpl::rev_analysis_needed() const {
- return is_multi_band(formats_.rev_proc_format.sample_rate_hz()) &&
- (is_rev_processed() ||
- public_submodules_->echo_cancellation
- ->is_enabled_render_side_query() ||
- public_submodules_->echo_control_mobile
- ->is_enabled_render_side_query() ||
- public_submodules_->gain_control->is_enabled_render_side_query());
-}
-
-bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
- return rev_conversion_needed();
-}
-
-bool AudioProcessingImpl::rev_conversion_needed() const {
- return (formats_.api_format.reverse_input_stream() !=
- formats_.api_format.reverse_output_stream());
+bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
+ return submodule_states_.Update(
+ public_submodules_->high_pass_filter->is_enabled(),
+ public_submodules_->echo_cancellation->is_enabled(),
+ public_submodules_->echo_control_mobile->is_enabled(),
+ public_submodules_->noise_suppression->is_enabled(),
+ capture_nonlocked_.intelligibility_enabled,
+ capture_nonlocked_.beamformer_enabled,
+ public_submodules_->gain_control->is_enabled(),
+ capture_nonlocked_.level_controller_enabled,
+ public_submodules_->voice_detection->is_enabled(),
+ public_submodules_->level_estimator->is_enabled(),
+ capture_.transient_suppressor_enabled);
}
void AudioProcessingImpl::InitializeExperimentalAgc() {

Powered by Google App Engine
This is Rietveld 408576698