webrtc/modules/audio_processing/level_controller/signal_classifier.cc - Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module

Unified Diff: webrtc/modules/audio_processing/level_controller/signal_classifier.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Corrected the initial behavior for the peak level estimate, and ensured a nonzero minimum peak leve… Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/level_controller/signal_classifier.h ('K') | « webrtc/modules/audio_processing/level_controller/signal_classifier.h ('k') | webrtc/modules/audio_processing/logging/apm_data_dumper.h » ('j') | webrtc/modules/audio_processing/test/process_test.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/level_controller/signal_classifier.cc

diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.cc b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc

new file mode 100644

index 0000000000000000000000000000000000000000..d09220365807c4a07c563160ae2f24000823b45f

--- /dev/null

+++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc

@@ -0,0 +1,157 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"

+#include <algorithm>

+#include <numeric>

+#include <vector>

+#include "webrtc/base/array_view.h"

+#include "webrtc/base/constructormagic.h"

+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"

+#include "webrtc/modules/audio_processing/audio_buffer.h"

+#include "webrtc/modules/audio_processing/level_controller/down_sampler.h"

+#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"

+#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

+namespace webrtc {

+namespace {

+void RemoveDcLevel(rtc::ArrayView<float> x) {

+ RTC_DCHECK_LT(0u, x.size());

+ float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);

+ mean /= x.size();

+ for (float& v : x) {

+ v -= mean;

+ }

+void PowerSpectrum(rtc::ArrayView<const float> x,

+ rtc::ArrayView<float> spectrum) {

+ RTC_DCHECK_EQ(65u, spectrum.size());

+ RTC_DCHECK_EQ(128u, x.size());

+ float X[128];

+ std::copy(x.data(), x.data() + x.size(), X);

+ aec_rdft_forward_128(X);

+ float* X_p = X;

+ RTC_CHECK_EQ(X_p, &X[0]);

hlundin-webrtc 2016/06/27 11:21:17 Use DCHECK here and in the places below.

peah-webrtc 2016/06/27 22:51:50 Done.

+ spectrum[0] = (*X_p) * (*X_p);

+ ++X_p;

+ RTC_CHECK_EQ(X_p, &X[1]);

+ spectrum[64] = (*X_p) * (*X_p);

+ for (int k = 1; k < 64; ++k) {

+ ++X_p;

+ RTC_CHECK_EQ(X_p, &X[2 * k]);

+ spectrum[k] += (*X_p) * (*X_p);

hlundin-webrtc 2016/06/27 11:21:17 You are adding to an undefined value here. Use = i

peah-webrtc 2016/06/27 22:51:50 Great find!!!!! Done.

+ ++X_p;

+ RTC_CHECK_EQ(X_p, &X[2 * k + 1]);

+ spectrum[k] += (*X_p) * (*X_p);

+ }

+void ClassifySignal(rtc::ArrayView<const float> signal_spectrum,

+ rtc::ArrayView<const float> noise_spectrum,

+ ApmDataDumper* data_dumper,

+ SignalClassifier::SignalType* signal_type) {

hlundin-webrtc 2016/06/27 11:21:17 Return the signal type instead of having it as an

peah-webrtc 2016/06/27 22:51:50 Done.

+ int num_stationary_bands = 0;

+ int num_highly_nonstationary_bands = 0;

+ // Detect stationary and highly nonstationary bands.

+ for (int k = 1; k < 40; k++) {

hlundin-webrtc 2016/06/27 11:21:17 size_t

peah-webrtc 2016/06/27 22:51:50 Done.

+ if (signal_spectrum[k] < 3 * noise_spectrum[k]) {

+ ++num_stationary_bands;

+ } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {

+ ++num_highly_nonstationary_bands;

+ }

+ // Use the detected number of bands to classify the overall signal

+ // stationarity.

+ if (num_stationary_bands > 20) {

+ *signal_type = SignalClassifier::SignalType::kStationary;

+ } else if (num_highly_nonstationary_bands > 15) {

+ *signal_type = SignalClassifier::SignalType::kHighlyNonStationary;

+ } else {

+ *signal_type = SignalClassifier::SignalType::kNonStationary;

+ }

+ data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);

+ data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,

+ &num_highly_nonstationary_bands);

+} // namespace

+class FrameExtender {

hlundin-webrtc 2016/06/27 11:21:17 Why is this outside of the unnamed namespace?

peah-webrtc 2016/06/27 22:51:50 I think it needs to be outside of that for the rea

hlundin-webrtc 2016/06/28 11:29:01 Oh, I see. Then I think I would prefer to have it

peah-webrtc 2016/06/28 22:19:37 Done.

+ public:

+ FrameExtender(size_t frame_size, size_t extended_frame_size)

+ : x_old_(extended_frame_size - frame_size, 0.f) {}

+ void ExtendFrame(rtc::ArrayView<const float> x,

+ rtc::ArrayView<float> x_extended) {

+ RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());

+ std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());

+ std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());

+ std::copy(x_extended.data() + x_extended.size() - x_old_.size(),

+ x_extended.data() + x_extended.size(), x_old_.data());

+ }

+ private:

+ std::vector<float> x_old_;

+ RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameExtender);

+};

+SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)

+ : data_dumper_(data_dumper) {

+ Initialize(AudioProcessing::kSampleRate48kHz);

+SignalClassifier::~SignalClassifier() {}

+void SignalClassifier::Initialize(int sample_rate_hz) {

+ aec_rdft_init();

+ down_sampler_.reset(new DownSampler(data_dumper_, sample_rate_hz));

+ frame_extender_.reset(new FrameExtender(80, 128));

+ noise_spectrum_estimator_.reset(new NoiseSpectrumEstimator(data_dumper_));

+ sample_rate_hz_ = sample_rate_hz;

+ initialization_frames_left_ = 2;

+void SignalClassifier::Analyze(const AudioBuffer& audio,

+ SignalType* signal_type) {

+ // Compute the signal power spectrum.

hlundin-webrtc 2016/06/27 11:21:17 You may want to DCHECK some properties of the Audi

peah-webrtc 2016/06/27 22:51:50 That would make sense, but I think AudioBuffer doe

hlundin-webrtc 2016/06/28 11:29:01 Can't you just check that the number of samples in

peah-webrtc 2016/06/28 22:19:37 True! Good point! Done.

+ float downsampled_frame[80];

+ down_sampler_->DownSample(

+ rtc::ArrayView<const float>(audio.channels_const_f()[0],

+ audio.num_frames()),

+ downsampled_frame);

+ float extended_frame[128];

+ frame_extender_->ExtendFrame(downsampled_frame, extended_frame);

+ RemoveDcLevel(extended_frame);

+ float signal_spectrum[65];

+ PowerSpectrum(extended_frame, signal_spectrum);

+ // Classify the signal based on the estimate of the noise spectrum and the

+ // signal spectrum estimate.

+ ClassifySignal(signal_spectrum, noise_spectrum_estimator_->GetNoiseSpectrum(),

+ data_dumper_, signal_type);

+ // Update the noise spectrum based on the signal spectrum.

+ noise_spectrum_estimator_->Update(signal_spectrum,

+ initialization_frames_left_ > 0);

+ // Update the number of frames until a reliable signal spectrum is achieved.

+ initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);

+} // namespace webrtc