Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1270)

Unified Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2304123002: Cleaned up and revised the handling of resampling and bandsplitting in APM and (Closed)
Patch Set: Typo correction Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/audio_processing_impl.cc
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 222f749fb7b876fc55e53588a6d0914a192f90f1..64e051af0f34a58477333ea5087ee6d92025ae19 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -82,15 +82,6 @@ const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
namespace {
-const int kInternalNativeRates[] = {AudioProcessing::kSampleRate8kHz,
- AudioProcessing::kSampleRate16kHz,
-#ifdef WEBRTC_ARCH_ARM_FAMILY
- AudioProcessing::kSampleRate32kHz};
-#else
- AudioProcessing::kSampleRate32kHz,
- AudioProcessing::kSampleRate48kHz};
-#endif // WEBRTC_ARCH_ARM_FAMILY
-
static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
@@ -105,18 +96,29 @@ static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
return false;
}
-bool is_multi_band(int sample_rate_hz) {
+bool SampleRateSupportsMultiBand(int sample_rate_hz) {
return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
}
-int ClosestHigherNativeRate(int min_proc_rate) {
- for (int rate : kInternalNativeRates) {
- if (rate >= min_proc_rate) {
+int NativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {
+#ifdef WEBRTC_ARCH_ARM_FAMILY
+ const size_t kMaxSplittingNativeProcessRateIndex = 2;
+#else
+ const size_t kMaxSplittingNativeProcessRateIndex = 3;
+#endif
+ RTC_DCHECK_LT(kMaxSplittingNativeProcessRateIndex,
hlundin-webrtc 2016/09/06 10:41:32 You should be able to use a static_assert instead.
peah-webrtc 2016/09/07 06:28:11 Great suggestion but I ran into some problems. See
hlundin-webrtc 2016/09/07 21:36:05 Weird. I thought it'd work if you used constexpr i
peah-webrtc 2016/09/08 08:44:03 Acknowledged.
+ AudioProcessing::kNumNativeSampleRates);
+ size_t uppermost_native_rate_index =
+ (band_splitting_required ? kMaxSplittingNativeProcessRateIndex : 3);
+
+ for (size_t k = 0; k <= uppermost_native_rate_index; ++k) {
hlundin-webrtc 2016/09/06 10:41:32 Consider this alternative implementation of the fu
peah-webrtc 2016/09/07 06:28:11 This is a much nicer variant! Thanks!!! I could n
kwiberg-webrtc 2016/09/08 08:17:27 What problems did you encounter? It feels like thi
peah-webrtc 2016/09/08 08:44:03 There are not that many constexpr's to add. I trie
+ int rate = AudioProcessing::kNativeSampleRatesHz[k];
+ if (rate >= minimum_rate) {
return rate;
}
}
- return kInternalNativeRates[arraysize(kInternalNativeRates) - 1];
+ return AudioProcessing::kNativeSampleRatesHz[uppermost_native_rate_index];
}
} // namespace
@@ -124,6 +126,83 @@ int ClosestHigherNativeRate(int min_proc_rate) {
// Throughout webrtc, it's assumed that success is represented by zero.
static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
+class AudioProcessingImpl::ApmSubmoduleStates {
+ public:
+ ApmSubmoduleStates(){};
+ // Updates the submodule state and returns true if it has changed.
+ bool Update(bool hpf_enabled,
+ bool aec_enabled,
+ bool aecm_enabled,
+ bool ns_enabled,
+ bool ie_enabled,
+ bool bf_enabled,
+ bool agc_enabled,
+ bool lc_enabled,
+ bool vad_enabled,
+ bool le_enabled,
+ bool ts_enabled) {
the sun 2016/09/02 20:11:34 What's "ts"? (I could look it up, but I pretend to
peah-webrtc 2016/09/07 06:28:11 ts is the transient suppressor. The abbreviation
+ bool changed = false;
+ changed |= (hpf_enabled != hpf_enabled_);
+ changed |= (aec_enabled != aec_enabled_);
+ changed |= (aecm_enabled != aecm_enabled_);
+ changed |= (ns_enabled != ns_enabled_);
+ changed |= (ie_enabled != ie_enabled_);
+ changed |= (bf_enabled != bf_enabled_);
+ changed |= (agc_enabled != agc_enabled_);
+ changed |= (lc_enabled != lc_enabled_);
+ changed |= (le_enabled != le_enabled_);
+ changed |= (vad_enabled != vad_enabled_);
+ changed |= (ts_enabled != ts_enabled_);
+ if (changed) {
+ hpf_enabled_ = hpf_enabled;
+ aec_enabled_ = aec_enabled;
+ aecm_enabled_ = aecm_enabled;
+ ns_enabled_ = ns_enabled;
+ ie_enabled_ = ie_enabled;
+ bf_enabled_ = bf_enabled;
+ agc_enabled_ = agc_enabled;
+ lc_enabled_ = lc_enabled;
+ le_enabled_ = le_enabled;
+ vad_enabled_ = vad_enabled;
+ ts_enabled_ = ts_enabled;
+ }
+
+ changed |= first_update_;
+ first_update_ = false;
+ return changed;
+ }
+
+ bool CaptureMultiBandModulesActive() const {
+ return CaptureMultiBandEffectsActive() || ie_enabled_ || vad_enabled_;
+ }
+
+ bool CaptureMultiBandEffectsActive() const {
+ return (hpf_enabled_ || aec_enabled_ || aecm_enabled_ || ns_enabled_ ||
hlundin-webrtc 2016/09/06 10:41:32 You can drop the () here too.
peah-webrtc 2016/09/07 06:28:11 Done.
+ bf_enabled_ || agc_enabled_);
+ }
+
+ bool RenderMultiBandModulesActive() const {
+ return RenderMultiBandEffectsActive() || aec_enabled_ || aecm_enabled_ ||
+ agc_enabled_;
+ }
+
+ bool RenderMultiBandEffectsActive() const { return ie_enabled_; }
+
+ private:
+ bool hpf_enabled_ = false;
+ bool aec_enabled_ = false;
+ bool aecm_enabled_ = false;
+ bool ns_enabled_ = false;
+ bool ie_enabled_ = false;
+ bool bf_enabled_ = false;
+ bool agc_enabled_ = false;
+ bool lc_enabled_ = false;
+ bool le_enabled_ = false;
+ bool vad_enabled_ = false;
+ bool ts_enabled_ = false;
+ bool first_update_ = true;
+};
+
struct AudioProcessingImpl::ApmPublicSubmodules {
ApmPublicSubmodules() {}
// Accessed externally of APM without any lock acquired.
@@ -178,7 +257,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
AudioProcessingImpl::AudioProcessingImpl(const Config& config,
NonlinearBeamformer* beamformer)
- : public_submodules_(new ApmPublicSubmodules()),
+ : submodule_states_(new ApmSubmoduleStates()),
+ public_submodules_(new ApmPublicSubmodules()),
private_submodules_(new ApmPrivateSubmodules(beamformer)),
constants_(config.Get<ExperimentalAgc>().startup_min_volume,
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
@@ -275,12 +355,13 @@ int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
int AudioProcessingImpl::MaybeInitializeRender(
const ProcessingConfig& processing_config) {
- return MaybeInitialize(processing_config);
+ return MaybeInitialize(processing_config, false);
}
int AudioProcessingImpl::MaybeInitializeCapture(
- const ProcessingConfig& processing_config) {
- return MaybeInitialize(processing_config);
+ const ProcessingConfig& processing_config,
+ bool force_initialization) {
+ return MaybeInitialize(processing_config, force_initialization);
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
@@ -300,9 +381,10 @@ AudioProcessingImpl::ApmDebugDumpState::~ApmDebugDumpState() {}
// Calls InitializeLocked() if any of the audio parameters have changed from
// their current values (needs to be called while holding the crit_render_lock).
int AudioProcessingImpl::MaybeInitialize(
- const ProcessingConfig& processing_config) {
+ const ProcessingConfig& processing_config,
+ bool force_initialization) {
// Called from both threads. Thread check is therefore not possible.
- if (processing_config == formats_.api_format) {
+ if (processing_config == formats_.api_format && !force_initialization) {
return kNoError;
}
@@ -326,7 +408,8 @@ int AudioProcessingImpl::InitializeLocked() {
formats_.rev_proc_format.num_frames(),
formats_.rev_proc_format.num_channels(),
rev_audio_buffer_out_num_frames));
- if (rev_conversion_needed()) {
+ if (formats_.api_format.reverse_input_stream() !=
+ formats_.api_format.reverse_output_stream()) {
render_.render_converter = AudioConverter::Create(
formats_.api_format.reverse_input_stream().num_channels(),
formats_.api_format.reverse_input_stream().num_frames(),
@@ -397,17 +480,25 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
formats_.api_format = config;
- capture_nonlocked_.fwd_proc_format = StreamConfig(ClosestHigherNativeRate(
+ int fwd_proc_rate = NativeProcessRateToUse(
std::min(formats_.api_format.input_stream().sample_rate_hz(),
- formats_.api_format.output_stream().sample_rate_hz())));
+ formats_.api_format.output_stream().sample_rate_hz()),
+ submodule_states_->CaptureMultiBandModulesActive() ||
+ submodule_states_->RenderMultiBandModulesActive());
+
+ capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
- int rev_proc_rate = ClosestHigherNativeRate(std::min(
- formats_.api_format.reverse_input_stream().sample_rate_hz(),
- formats_.api_format.reverse_output_stream().sample_rate_hz()));
+ int rev_proc_rate = NativeProcessRateToUse(
+ std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),
+ formats_.api_format.reverse_output_stream().sample_rate_hz()),
+ submodule_states_->CaptureMultiBandModulesActive() ||
+ submodule_states_->RenderMultiBandModulesActive());
// TODO(aluebs): Remove this restriction once we figure out why the 3-band
// splitting filter degrades the AEC performance.
if (rev_proc_rate > kSampleRate32kHz) {
- rev_proc_rate = is_rev_processed() ? kSampleRate32kHz : kSampleRate16kHz;
+ rev_proc_rate = submodule_states_->RenderMultiBandEffectsActive()
+ ? kSampleRate32kHz
+ : kSampleRate16kHz;
}
// If the forward sample rate is 8 kHz, the reverse stream is also processed
// at this rate.
@@ -556,6 +647,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
float* const* dest) {
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
ProcessingConfig processing_config;
+ bool reeinitialization_required;
hlundin-webrtc 2016/09/06 10:41:32 Typo: reeinit... -> reinit...
peah-webrtc 2016/09/07 06:28:11 Done.
{
// Acquire the capture lock in order to safely call the function
// that retrieves the render side data. This function accesses apm
@@ -570,6 +662,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
}
processing_config = formats_.api_format;
+ reeinitialization_required = UpdateActiveSubmoduleStates();
}
processing_config.input_stream() = input_config;
@@ -578,7 +671,8 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
{
// Do conditional reinitialization.
rtc::CritScope cs_render(&crit_render_);
- RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
+ RETURN_ON_ERR(
+ MaybeInitializeCapture(processing_config, reeinitialization_required));
}
rtc::CritScope cs_capture(&crit_capture_);
assert(processing_config.input_stream().num_frames() ==
@@ -646,6 +740,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
}
ProcessingConfig processing_config;
+ bool reeinitialization_required;
hlundin-webrtc 2016/09/06 10:41:32 Same typo.
peah-webrtc 2016/09/07 06:28:11 Done.
{
// Aquire lock for the access of api_format.
// The lock is released immediately due to the conditional
@@ -654,6 +749,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
// TODO(ajm): The input and output rates and channels are currently
// constrained to be identical in the int16 interface.
processing_config = formats_.api_format;
+
+ reeinitialization_required = UpdateActiveSubmoduleStates();
}
processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_);
processing_config.input_stream().set_num_channels(frame->num_channels_);
@@ -663,7 +760,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
{
// Do conditional reinitialization.
rtc::CritScope cs_render(&crit_render_);
- RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
+ RETURN_ON_ERR(
+ MaybeInitializeCapture(processing_config, reeinitialization_required));
}
rtc::CritScope cs_capture(&crit_capture_);
if (frame->samples_per_channel_ !=
@@ -685,7 +783,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
capture_.capture_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessStreamLocked());
- capture_.capture_audio->InterleaveTo(frame, output_copy_needed());
+ capture_.capture_audio->InterleaveTo(frame, true);
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_dump_.debug_file->is_open()) {
@@ -731,7 +829,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {
capture_nonlocked_.fwd_proc_format.num_frames());
}
- if (fwd_analysis_needed()) {
+ if (submodule_states_->CaptureMultiBandModulesActive() &&
+ SampleRateSupportsMultiBand(
+ capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
ca->SplitIntoFrequencyBands();
}
@@ -802,7 +902,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {
RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(
ca, echo_cancellation()->stream_has_echo()));
- if (fwd_synthesis_needed()) {
+ if (submodule_states_->CaptureMultiBandEffectsActive() &&
+ SampleRateSupportsMultiBand(
+ capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
ca->MergeFrequencyBands();
}
@@ -856,10 +958,11 @@ int AudioProcessingImpl::ProcessReverseStream(
rtc::CritScope cs(&crit_render_);
RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config,
reverse_output_config));
- if (is_rev_processed()) {
+ if (submodule_states_->RenderMultiBandEffectsActive()) {
render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
dest);
- } else if (render_check_rev_conversion_needed()) {
+ } else if (formats_.api_format.reverse_input_stream() !=
+ formats_.api_format.reverse_output_stream()) {
render_.render_converter->Convert(src, reverse_input_config.num_samples(),
dest,
reverse_output_config.num_samples());
@@ -961,15 +1064,14 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
#endif
render_.render_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessReverseStreamLocked());
- if (is_rev_processed()) {
- render_.render_audio->InterleaveTo(frame, true);
- }
+ render_.render_audio->InterleaveTo(frame, true);
return kNoError;
}
int AudioProcessingImpl::ProcessReverseStreamLocked() {
AudioBuffer* ra = render_.render_audio.get(); // For brevity.
- if (rev_analysis_needed()) {
+ if (submodule_states_->RenderMultiBandModulesActive() &&
+ SampleRateSupportsMultiBand(formats_.rev_proc_format.sample_rate_hz())) {
ra->SplitIntoFrequencyBands();
}
@@ -988,7 +1090,8 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() {
RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
}
- if (rev_synthesis_needed()) {
+ if (submodule_states_->RenderMultiBandEffectsActive() &&
+ SampleRateSupportsMultiBand(formats_.rev_proc_format.sample_rate_hz())) {
ra->MergeFrequencyBands();
}
@@ -1122,20 +1225,14 @@ int AudioProcessingImpl::StopDebugRecording() {
}
EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->echo_cancellation.get();
}
EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->echo_control_mobile.get();
}
GainControl* AudioProcessingImpl::gain_control() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
if (constants_.use_experimental_agc) {
return public_submodules_->gain_control_for_experimental_agc.get();
}
@@ -1143,103 +1240,34 @@ GainControl* AudioProcessingImpl::gain_control() const {
}
HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->high_pass_filter.get();
}
LevelEstimator* AudioProcessingImpl::level_estimator() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->level_estimator.get();
}
NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->noise_suppression.get();
}
VoiceDetection* AudioProcessingImpl::voice_detection() const {
- // Adding a lock here has no effect as it allows any access to the submodule
- // from the returned pointer.
return public_submodules_->voice_detection.get();
}
-bool AudioProcessingImpl::is_fwd_processed() const {
- // The beamformer, noise suppressor and highpass filter
- // modify the data.
- if (capture_nonlocked_.beamformer_enabled ||
- public_submodules_->high_pass_filter->is_enabled() ||
- public_submodules_->noise_suppression->is_enabled() ||
- public_submodules_->echo_cancellation->is_enabled() ||
- public_submodules_->echo_control_mobile->is_enabled() ||
- public_submodules_->gain_control->is_enabled()) {
- return true;
- }
-
- // The capture data is otherwise unchanged.
- return false;
-}
-
-bool AudioProcessingImpl::output_copy_needed() const {
- // Check if we've upmixed or downmixed the audio.
- return ((formats_.api_format.output_stream().num_channels() !=
- formats_.api_format.input_stream().num_channels()) ||
- is_fwd_processed() || capture_.transient_suppressor_enabled ||
- capture_nonlocked_.level_controller_enabled);
-}
-
-bool AudioProcessingImpl::fwd_synthesis_needed() const {
- return (is_fwd_processed() &&
- is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));
-}
-
-bool AudioProcessingImpl::fwd_analysis_needed() const {
- if (!is_fwd_processed() &&
- !public_submodules_->voice_detection->is_enabled() &&
- !capture_.transient_suppressor_enabled) {
- // Only public_submodules_->level_estimator is enabled.
- return false;
- } else if (is_multi_band(
- capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {
- // Something besides public_submodules_->level_estimator is enabled, and we
- // have super-wb.
- return true;
- }
- return false;
-}
-
-bool AudioProcessingImpl::is_rev_processed() const {
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- return capture_nonlocked_.intelligibility_enabled;
-#else
- return false;
-#endif
-}
-
-bool AudioProcessingImpl::rev_synthesis_needed() const {
- return (is_rev_processed() &&
- is_multi_band(formats_.rev_proc_format.sample_rate_hz()));
-}
-
-bool AudioProcessingImpl::rev_analysis_needed() const {
- return is_multi_band(formats_.rev_proc_format.sample_rate_hz()) &&
- (is_rev_processed() ||
- public_submodules_->echo_cancellation
- ->is_enabled_render_side_query() ||
- public_submodules_->echo_control_mobile
- ->is_enabled_render_side_query() ||
- public_submodules_->gain_control->is_enabled_render_side_query());
-}
-
-bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
- return rev_conversion_needed();
-}
-
-bool AudioProcessingImpl::rev_conversion_needed() const {
- return (formats_.api_format.reverse_input_stream() !=
- formats_.api_format.reverse_output_stream());
+bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
+ return submodule_states_->Update(
+ public_submodules_->high_pass_filter->is_enabled(),
+ public_submodules_->echo_cancellation->is_enabled(),
+ public_submodules_->echo_control_mobile->is_enabled(),
+ public_submodules_->noise_suppression->is_enabled(),
+ capture_nonlocked_.intelligibility_enabled,
+ capture_nonlocked_.beamformer_enabled,
+ public_submodules_->gain_control->is_enabled(),
+ capture_nonlocked_.level_controller_enabled,
+ public_submodules_->voice_detection->is_enabled(),
+ public_submodules_->level_estimator->is_enabled(),
+ capture_.transient_suppressor_enabled);
}
void AudioProcessingImpl::InitializeExperimentalAgc() {

Powered by Google App Engine
This is Rietveld 408576698