webrtc/modules/audio_processing/audio_processing_impl.cc - Issue 2304123002: Cleaned up and revised the handling of resampling and bandsplitting in APM and

Unified Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2304123002: Cleaned up and revised the handling of resampling and bandsplitting in APM and (Closed)

Patch Set: Typo correction Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« webrtc/modules/audio_processing/audio_processing_impl.h ('K') | « webrtc/modules/audio_processing/audio_processing_impl.h ('k') | webrtc/modules/audio_processing/echo_cancellation_impl.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/audio_processing_impl.cc

diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc

index 222f749fb7b876fc55e53588a6d0914a192f90f1..64e051af0f34a58477333ea5087ee6d92025ae19 100644

--- a/webrtc/modules/audio_processing/audio_processing_impl.cc

+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc

@@ -82,15 +82,6 @@ const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::

namespace {

-const int kInternalNativeRates[] = {AudioProcessing::kSampleRate8kHz,

- AudioProcessing::kSampleRate16kHz,

-#ifdef WEBRTC_ARCH_ARM_FAMILY

- AudioProcessing::kSampleRate32kHz};

-#else

- AudioProcessing::kSampleRate32kHz,

- AudioProcessing::kSampleRate48kHz};

-#endif // WEBRTC_ARCH_ARM_FAMILY

static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {

switch (layout) {

case AudioProcessing::kMono:

@@ -105,18 +96,29 @@ static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {

return false;

}

-bool is_multi_band(int sample_rate_hz) {

+bool SampleRateSupportsMultiBand(int sample_rate_hz) {

return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||

sample_rate_hz == AudioProcessing::kSampleRate48kHz;

}

-int ClosestHigherNativeRate(int min_proc_rate) {

- for (int rate : kInternalNativeRates) {

- if (rate >= min_proc_rate) {

+int NativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {

+#ifdef WEBRTC_ARCH_ARM_FAMILY

+ const size_t kMaxSplittingNativeProcessRateIndex = 2;

+#else

+ const size_t kMaxSplittingNativeProcessRateIndex = 3;

+#endif

+ RTC_DCHECK_LT(kMaxSplittingNativeProcessRateIndex,

hlundin-webrtc 2016/09/06 10:41:32 You should be able to use a static_assert instead.

peah-webrtc 2016/09/07 06:28:11 Great suggestion but I ran into some problems. See

hlundin-webrtc 2016/09/07 21:36:05 Weird. I thought it'd work if you used constexpr i

peah-webrtc 2016/09/08 08:44:03 Acknowledged.

+ AudioProcessing::kNumNativeSampleRates);

+ size_t uppermost_native_rate_index =

+ (band_splitting_required ? kMaxSplittingNativeProcessRateIndex : 3);

+ for (size_t k = 0; k <= uppermost_native_rate_index; ++k) {

hlundin-webrtc 2016/09/06 10:41:32 Consider this alternative implementation of the fu

peah-webrtc 2016/09/07 06:28:11 This is a much nicer variant! Thanks!!! I could n

kwiberg-webrtc 2016/09/08 08:17:27 What problems did you encounter? It feels like thi

peah-webrtc 2016/09/08 08:44:03 There are not that many constexpr's to add. I trie

There are not that many constexpr's to add. I tried adding that for const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1]; as well as for const size_t AudioProcessing::kNumNativeSampleRates = arraysize(AudioProcessing::kNativeSampleRatesHz); const int kMaxSplittingNativeProcessRate = AudioProcessing::kSampleRate48kHz; But it did not work. My guess was that the arraysize was an issue.

+ int rate = AudioProcessing::kNativeSampleRatesHz[k];

+ if (rate >= minimum_rate) {

return rate;

}

- return kInternalNativeRates[arraysize(kInternalNativeRates) - 1];

+ return AudioProcessing::kNativeSampleRatesHz[uppermost_native_rate_index];

}

} // namespace

@@ -124,6 +126,83 @@ int ClosestHigherNativeRate(int min_proc_rate) {

// Throughout webrtc, it's assumed that success is represented by zero.

static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");

+class AudioProcessingImpl::ApmSubmoduleStates {

+ public:

+ ApmSubmoduleStates(){};

+ // Updates the submodule state and returns true if it has changed.

+ bool Update(bool hpf_enabled,

+ bool aec_enabled,

+ bool aecm_enabled,

+ bool ns_enabled,

+ bool ie_enabled,

+ bool bf_enabled,

+ bool agc_enabled,

+ bool lc_enabled,

+ bool vad_enabled,

+ bool le_enabled,

+ bool ts_enabled) {

the sun 2016/09/02 20:11:34 What's "ts"? (I could look it up, but I pretend to

peah-webrtc 2016/09/07 06:28:11 ts is the transient suppressor. The abbreviation

+ bool changed = false;

+ changed |= (hpf_enabled != hpf_enabled_);

+ changed |= (aec_enabled != aec_enabled_);

+ changed |= (aecm_enabled != aecm_enabled_);

+ changed |= (ns_enabled != ns_enabled_);

+ changed |= (ie_enabled != ie_enabled_);

+ changed |= (bf_enabled != bf_enabled_);

+ changed |= (agc_enabled != agc_enabled_);

+ changed |= (lc_enabled != lc_enabled_);

+ changed |= (le_enabled != le_enabled_);

+ changed |= (vad_enabled != vad_enabled_);

+ changed |= (ts_enabled != ts_enabled_);

+ if (changed) {

+ hpf_enabled_ = hpf_enabled;

+ aec_enabled_ = aec_enabled;

+ aecm_enabled_ = aecm_enabled;

+ ns_enabled_ = ns_enabled;

+ ie_enabled_ = ie_enabled;

+ bf_enabled_ = bf_enabled;

+ agc_enabled_ = agc_enabled;

+ lc_enabled_ = lc_enabled;

+ le_enabled_ = le_enabled;

+ vad_enabled_ = vad_enabled;

+ ts_enabled_ = ts_enabled;

+ }

+ changed |= first_update_;

+ first_update_ = false;

+ return changed;

+ }

+ bool CaptureMultiBandModulesActive() const {

+ return CaptureMultiBandEffectsActive() || ie_enabled_ || vad_enabled_;

+ }

+ bool CaptureMultiBandEffectsActive() const {

+ return (hpf_enabled_ || aec_enabled_ || aecm_enabled_ || ns_enabled_ ||

hlundin-webrtc 2016/09/06 10:41:32 You can drop the () here too.

peah-webrtc 2016/09/07 06:28:11 Done.

+ bf_enabled_ || agc_enabled_);

+ }

+ bool RenderMultiBandModulesActive() const {

+ return RenderMultiBandEffectsActive() || aec_enabled_ || aecm_enabled_ ||

+ agc_enabled_;

+ }

+ bool RenderMultiBandEffectsActive() const { return ie_enabled_; }

+ private:

+ bool hpf_enabled_ = false;

+ bool aec_enabled_ = false;

+ bool aecm_enabled_ = false;

+ bool ns_enabled_ = false;

+ bool ie_enabled_ = false;

+ bool bf_enabled_ = false;

+ bool agc_enabled_ = false;

+ bool lc_enabled_ = false;

+ bool le_enabled_ = false;

+ bool vad_enabled_ = false;

+ bool ts_enabled_ = false;

+ bool first_update_ = true;

+};

struct AudioProcessingImpl::ApmPublicSubmodules {

ApmPublicSubmodules() {}

// Accessed externally of APM without any lock acquired.

@@ -178,7 +257,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)

AudioProcessingImpl::AudioProcessingImpl(const Config& config,

NonlinearBeamformer* beamformer)

- : public_submodules_(new ApmPublicSubmodules()),

+ : submodule_states_(new ApmSubmoduleStates()),

+ public_submodules_(new ApmPublicSubmodules()),

private_submodules_(new ApmPrivateSubmodules(beamformer)),

constants_(config.Get<ExperimentalAgc>().startup_min_volume,

#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)

@@ -275,12 +355,13 @@ int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {

int AudioProcessingImpl::MaybeInitializeRender(

const ProcessingConfig& processing_config) {

- return MaybeInitialize(processing_config);

+ return MaybeInitialize(processing_config, false);

}

int AudioProcessingImpl::MaybeInitializeCapture(

- const ProcessingConfig& processing_config) {

- return MaybeInitialize(processing_config);

+ const ProcessingConfig& processing_config,

+ bool force_initialization) {

+ return MaybeInitialize(processing_config, force_initialization);

}

#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

@@ -300,9 +381,10 @@ AudioProcessingImpl::ApmDebugDumpState::~ApmDebugDumpState() {}

// Calls InitializeLocked() if any of the audio parameters have changed from

// their current values (needs to be called while holding the crit_render_lock).

int AudioProcessingImpl::MaybeInitialize(

- const ProcessingConfig& processing_config) {

+ const ProcessingConfig& processing_config,

+ bool force_initialization) {

// Called from both threads. Thread check is therefore not possible.

- if (processing_config == formats_.api_format) {

+ if (processing_config == formats_.api_format && !force_initialization) {

return kNoError;

}

@@ -326,7 +408,8 @@ int AudioProcessingImpl::InitializeLocked() {

formats_.rev_proc_format.num_frames(),

formats_.rev_proc_format.num_channels(),

rev_audio_buffer_out_num_frames));

- if (rev_conversion_needed()) {

+ if (formats_.api_format.reverse_input_stream() !=

+ formats_.api_format.reverse_output_stream()) {

render_.render_converter = AudioConverter::Create(

formats_.api_format.reverse_input_stream().num_channels(),

formats_.api_format.reverse_input_stream().num_frames(),

@@ -397,17 +480,25 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {

formats_.api_format = config;

- capture_nonlocked_.fwd_proc_format = StreamConfig(ClosestHigherNativeRate(

+ int fwd_proc_rate = NativeProcessRateToUse(

std::min(formats_.api_format.input_stream().sample_rate_hz(),

- formats_.api_format.output_stream().sample_rate_hz())));

+ formats_.api_format.output_stream().sample_rate_hz()),

+ submodule_states_->CaptureMultiBandModulesActive() ||

+ submodule_states_->RenderMultiBandModulesActive());

+ capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);

- int rev_proc_rate = ClosestHigherNativeRate(std::min(

- formats_.api_format.reverse_input_stream().sample_rate_hz(),

- formats_.api_format.reverse_output_stream().sample_rate_hz()));

+ int rev_proc_rate = NativeProcessRateToUse(

+ std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),

+ formats_.api_format.reverse_output_stream().sample_rate_hz()),

+ submodule_states_->CaptureMultiBandModulesActive() ||

+ submodule_states_->RenderMultiBandModulesActive());

// TODO(aluebs): Remove this restriction once we figure out why the 3-band

// splitting filter degrades the AEC performance.

if (rev_proc_rate > kSampleRate32kHz) {

- rev_proc_rate = is_rev_processed() ? kSampleRate32kHz : kSampleRate16kHz;

+ rev_proc_rate = submodule_states_->RenderMultiBandEffectsActive()

+ ? kSampleRate32kHz

+ : kSampleRate16kHz;

}

// If the forward sample rate is 8 kHz, the reverse stream is also processed

// at this rate.

@@ -556,6 +647,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,

float* const* dest) {

TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");

ProcessingConfig processing_config;

+ bool reeinitialization_required;

hlundin-webrtc 2016/09/06 10:41:32 Typo: reeinit... -> reinit...

peah-webrtc 2016/09/07 06:28:11 Done.

{

// Acquire the capture lock in order to safely call the function

// that retrieves the render side data. This function accesses apm

@@ -570,6 +662,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,

}

processing_config = formats_.api_format;

+ reeinitialization_required = UpdateActiveSubmoduleStates();

}

processing_config.input_stream() = input_config;

@@ -578,7 +671,8 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,

{

// Do conditional reinitialization.

rtc::CritScope cs_render(&crit_render_);

- RETURN_ON_ERR(MaybeInitializeCapture(processing_config));

+ RETURN_ON_ERR(

+ MaybeInitializeCapture(processing_config, reeinitialization_required));

}

rtc::CritScope cs_capture(&crit_capture_);

assert(processing_config.input_stream().num_frames() ==

@@ -646,6 +740,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {

}

ProcessingConfig processing_config;

+ bool reeinitialization_required;

hlundin-webrtc 2016/09/06 10:41:32 Same typo.

peah-webrtc 2016/09/07 06:28:11 Done.

{

// Aquire lock for the access of api_format.

// The lock is released immediately due to the conditional

@@ -654,6 +749,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {

// TODO(ajm): The input and output rates and channels are currently

// constrained to be identical in the int16 interface.

processing_config = formats_.api_format;

+ reeinitialization_required = UpdateActiveSubmoduleStates();

}

processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_);

processing_config.input_stream().set_num_channels(frame->num_channels_);

@@ -663,7 +760,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {

{

// Do conditional reinitialization.

rtc::CritScope cs_render(&crit_render_);

- RETURN_ON_ERR(MaybeInitializeCapture(processing_config));

+ RETURN_ON_ERR(

+ MaybeInitializeCapture(processing_config, reeinitialization_required));

}

rtc::CritScope cs_capture(&crit_capture_);

if (frame->samples_per_channel_ !=

@@ -685,7 +783,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {

capture_.capture_audio->DeinterleaveFrom(frame);

RETURN_ON_ERR(ProcessStreamLocked());

- capture_.capture_audio->InterleaveTo(frame, output_copy_needed());

+ capture_.capture_audio->InterleaveTo(frame, true);

#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

if (debug_dump_.debug_file->is_open()) {

@@ -731,7 +829,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {

capture_nonlocked_.fwd_proc_format.num_frames());

}

- if (fwd_analysis_needed()) {

+ if (submodule_states_->CaptureMultiBandModulesActive() &&

+ SampleRateSupportsMultiBand(

+ capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {

ca->SplitIntoFrequencyBands();

}

@@ -802,7 +902,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {

RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(

ca, echo_cancellation()->stream_has_echo()));

- if (fwd_synthesis_needed()) {

+ if (submodule_states_->CaptureMultiBandEffectsActive() &&

+ SampleRateSupportsMultiBand(

+ capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {

ca->MergeFrequencyBands();

}

@@ -856,10 +958,11 @@ int AudioProcessingImpl::ProcessReverseStream(

rtc::CritScope cs(&crit_render_);

RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config,

reverse_output_config));

- if (is_rev_processed()) {

+ if (submodule_states_->RenderMultiBandEffectsActive()) {

render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),

dest);

- } else if (render_check_rev_conversion_needed()) {

+ } else if (formats_.api_format.reverse_input_stream() !=

+ formats_.api_format.reverse_output_stream()) {

render_.render_converter->Convert(src, reverse_input_config.num_samples(),

dest,

reverse_output_config.num_samples());

@@ -961,15 +1064,14 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {

#endif

render_.render_audio->DeinterleaveFrom(frame);

RETURN_ON_ERR(ProcessReverseStreamLocked());

- if (is_rev_processed()) {

- render_.render_audio->InterleaveTo(frame, true);

- }

+ render_.render_audio->InterleaveTo(frame, true);

return kNoError;

}

int AudioProcessingImpl::ProcessReverseStreamLocked() {

AudioBuffer* ra = render_.render_audio.get(); // For brevity.

- if (rev_analysis_needed()) {

+ if (submodule_states_->RenderMultiBandModulesActive() &&

+ SampleRateSupportsMultiBand(formats_.rev_proc_format.sample_rate_hz())) {

ra->SplitIntoFrequencyBands();

}

@@ -988,7 +1090,8 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() {

RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));

}

- if (rev_synthesis_needed()) {

+ if (submodule_states_->RenderMultiBandEffectsActive() &&

+ SampleRateSupportsMultiBand(formats_.rev_proc_format.sample_rate_hz())) {

ra->MergeFrequencyBands();

}

@@ -1122,20 +1225,14 @@ int AudioProcessingImpl::StopDebugRecording() {

}

EchoCancellation* AudioProcessingImpl::echo_cancellation() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

return public_submodules_->echo_cancellation.get();

}

EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

return public_submodules_->echo_control_mobile.get();

}

GainControl* AudioProcessingImpl::gain_control() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

if (constants_.use_experimental_agc) {

return public_submodules_->gain_control_for_experimental_agc.get();

}

@@ -1143,103 +1240,34 @@ GainControl* AudioProcessingImpl::gain_control() const {

}

HighPassFilter* AudioProcessingImpl::high_pass_filter() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

return public_submodules_->high_pass_filter.get();

}

LevelEstimator* AudioProcessingImpl::level_estimator() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

return public_submodules_->level_estimator.get();

}

NoiseSuppression* AudioProcessingImpl::noise_suppression() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

return public_submodules_->noise_suppression.get();

}

VoiceDetection* AudioProcessingImpl::voice_detection() const {

- // Adding a lock here has no effect as it allows any access to the submodule

- // from the returned pointer.

return public_submodules_->voice_detection.get();

}

-bool AudioProcessingImpl::is_fwd_processed() const {

- // The beamformer, noise suppressor and highpass filter

- // modify the data.

- if (capture_nonlocked_.beamformer_enabled ||

- public_submodules_->high_pass_filter->is_enabled() ||

- public_submodules_->noise_suppression->is_enabled() ||

- public_submodules_->echo_cancellation->is_enabled() ||

- public_submodules_->echo_control_mobile->is_enabled() ||

- public_submodules_->gain_control->is_enabled()) {

- return true;

- }

- // The capture data is otherwise unchanged.

- return false;

-bool AudioProcessingImpl::output_copy_needed() const {

- // Check if we've upmixed or downmixed the audio.

- return ((formats_.api_format.output_stream().num_channels() !=

- formats_.api_format.input_stream().num_channels()) ||

- is_fwd_processed() || capture_.transient_suppressor_enabled ||

- capture_nonlocked_.level_controller_enabled);

-bool AudioProcessingImpl::fwd_synthesis_needed() const {

- return (is_fwd_processed() &&

- is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));

-bool AudioProcessingImpl::fwd_analysis_needed() const {

- if (!is_fwd_processed() &&

- !public_submodules_->voice_detection->is_enabled() &&

- !capture_.transient_suppressor_enabled) {

- // Only public_submodules_->level_estimator is enabled.

- return false;

- } else if (is_multi_band(

- capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {

- // Something besides public_submodules_->level_estimator is enabled, and we

- // have super-wb.

- return true;

- }

- return false;

-bool AudioProcessingImpl::is_rev_processed() const {

-#if WEBRTC_INTELLIGIBILITY_ENHANCER

- return capture_nonlocked_.intelligibility_enabled;

-#else

- return false;

-#endif

-bool AudioProcessingImpl::rev_synthesis_needed() const {

- return (is_rev_processed() &&

- is_multi_band(formats_.rev_proc_format.sample_rate_hz()));

-bool AudioProcessingImpl::rev_analysis_needed() const {

- return is_multi_band(formats_.rev_proc_format.sample_rate_hz()) &&

- (is_rev_processed() ||

- public_submodules_->echo_cancellation

- ->is_enabled_render_side_query() ||

- public_submodules_->echo_control_mobile

- ->is_enabled_render_side_query() ||

- public_submodules_->gain_control->is_enabled_render_side_query());

-bool AudioProcessingImpl::render_check_rev_conversion_needed() const {

- return rev_conversion_needed();

-bool AudioProcessingImpl::rev_conversion_needed() const {

- return (formats_.api_format.reverse_input_stream() !=

- formats_.api_format.reverse_output_stream());

+bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {

+ return submodule_states_->Update(

+ public_submodules_->high_pass_filter->is_enabled(),

+ public_submodules_->echo_cancellation->is_enabled(),

+ public_submodules_->echo_control_mobile->is_enabled(),

+ public_submodules_->noise_suppression->is_enabled(),

+ capture_nonlocked_.intelligibility_enabled,

+ capture_nonlocked_.beamformer_enabled,

+ public_submodules_->gain_control->is_enabled(),

+ capture_nonlocked_.level_controller_enabled,

+ public_submodules_->voice_detection->is_enabled(),

+ public_submodules_->level_estimator->is_enabled(),

+ capture_.transient_suppressor_enabled);

}

void AudioProcessingImpl::InitializeExperimentalAgc() {