webrtc/modules/audio_processing/level_controller/level_controller.cc - Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module

Unified Diff: webrtc/modules/audio_processing/level_controller/level_controller.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Added reporting of metrics Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/level_controller/level_controller.h ('K') | « webrtc/modules/audio_processing/level_controller/level_controller.h ('k') | webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/level_controller/level_controller.cc

diff --git a/webrtc/modules/audio_processing/level_controller/level_controller.cc b/webrtc/modules/audio_processing/level_controller/level_controller.cc

new file mode 100644

index 0000000000000000000000000000000000000000..d04daf2d4e8cce8248e0a1ff15d9db597ff9de5c

--- /dev/null

+++ b/webrtc/modules/audio_processing/level_controller/level_controller.cc

@@ -0,0 +1,233 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include "webrtc/modules/audio_processing/level_controller/level_controller.h"

+#include <math.h>

+#include <algorithm>

+#include <numeric>

+#include "webrtc/base/array_view.h"

+#include "webrtc/base/checks.h"

+#include "webrtc/modules/audio_processing/audio_buffer.h"

+#include "webrtc/modules/audio_processing/level_controller/gain_applier.h"

+#include "webrtc/modules/audio_processing/level_controller/gain_selector.h"

+#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h"

+#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h"

+#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h"

+#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"

+#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

+#include "webrtc/system_wrappers/include/metrics.h"

+namespace webrtc {

+namespace {

+void UpdateAndRemoveDcLevel(float forgetting_factor,

+ float* dc_level,

+ rtc::ArrayView<float> x) {

+ RTC_DCHECK(!x.empty());

+ float mean =

+ std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size());

+ *dc_level += forgetting_factor * (mean - *dc_level);

+ for (float& v : x) {

+ v -= *dc_level;

+ }

+float FrameEnergy(const AudioBuffer& audio) {

+ float energy = 0.f;

+ for (size_t k = 0; k < audio.num_channels(); ++k) {

+ float channel_energy =

+ std::accumulate(audio.channels_const_f()[k],

+ audio.channels_const_f()[k] + audio.num_frames(), 0,

+ [](float a, float b) -> float { return a + b * b; });

+ energy = std::max(channel_energy, energy);

+ }

+ return energy;

+float PeakLevel(const AudioBuffer& audio) {

+ float peak_level = 0.f;

+ for (size_t k = 0; k < audio.num_channels(); ++k) {

+ auto channel_peak_level = std::max_element(

+ audio.channels_const_f()[k],

+ audio.channels_const_f()[k] + audio.num_frames(),

+ [](float a, float b) { return std::abs(a) < std::abs(b); });

+ peak_level = std::max(*channel_peak_level, peak_level);

+ }

+ return peak_level;

+const int kMetricsFrameInterval = 1000;

+} // namespace

+int LevelController::instance_count_ = 0;

+void LevelController::Metrics::Initialize(int sample_rate_hz) {

+ RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||

+ sample_rate_hz == AudioProcessing::kSampleRate16kHz ||

+ sample_rate_hz == AudioProcessing::kSampleRate32kHz ||

+ sample_rate_hz == AudioProcessing::kSampleRate48kHz);

+ ResetEstimation();

+ frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);

+void LevelController::Metrics::ResetEstimation() {

hlundin-webrtc 2016/06/29 08:56:28 Just call this method Reset.

peah-webrtc 2016/06/29 09:13:53 Done.

+ metrics_frame_counter_ = 0;

+ gain_sum_ = 0.f;

+ peak_level_sum_ = 0.f;

+ noise_energy_sum_ = 0.f;

+ max_gain_ = 0.f;

+ max_peak_level_ = 0.f;

+ max_noise_energy_ = 0.f;

+void LevelController::Metrics::Update(float peak_level,

+ float noise_energy,

+ float gain) {

+ const float kdBFSOffset = 90.3090f;

+ gain_sum_ += gain;

+ peak_level_sum_ += peak_level;

+ noise_energy_sum_ += noise_energy;

+ max_gain_ = std::max(max_gain_, gain);

+ max_peak_level_ = std::max(max_peak_level_, peak_level);

+ max_noise_energy_ = std::max(max_noise_energy_, noise_energy);

+ ++metrics_frame_counter_;

+ if (metrics_frame_counter_ == kMetricsFrameInterval) {

+ RTC_HISTOGRAM_COUNTS(

+ "WebRTC.Audio.LevelControlMaxNoisePower",

hlundin-webrtc 2016/06/29 08:56:28 I think you should increase the readability of the

peah-webrtc 2016/06/29 09:13:53 Done.

+ static_cast<int>(10 *

hlundin-webrtc 2016/06/29 08:56:28 This was an awkward line wrap. Did clang format pr

peah-webrtc 2016/06/29 09:13:53 Yes, that was clang format. Sorry, I did not check

+ log10(max_noise_energy_ / frame_length_ + 1e-10f) -

+ kdBFSOffset),

+ -90, 0, 50);

+ RTC_HISTOGRAM_COUNTS(

+ "WebRTC.Audio.LevelControlAverageNoisePower",

+ static_cast<int>(10 *

+ log10(noise_energy_sum_ /

+ (frame_length_ * kMetricsFrameInterval) +

+ 1e-10f) -

+ kdBFSOffset),

+ -90, 0, 50);

+ RTC_HISTOGRAM_COUNTS(

+ "WebRTC.Audio.LevelControlMaxPeakLevel",

+ static_cast<int>(10 *

+ log10(max_peak_level_ * max_peak_level_ + 1e-10f) -

+ kdBFSOffset),

+ -90, 0, 50);

+ RTC_HISTOGRAM_COUNTS(

+ "WebRTC.Audio.LevelControlAveragePeakLevel",

+ static_cast<int>(

+ 10 * log10(peak_level_sum_ * peak_level_sum_ /

+ (kMetricsFrameInterval * kMetricsFrameInterval) +

+ 1e-10f) -

+ kdBFSOffset),

+ -90, 0, 50);

+ RTC_DCHECK_LE(1.f, max_gain_);

+ RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);

+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlMaxGain",

+ static_cast<int>(10 * log10(max_gain_ * max_gain_)), 0,

+ 33, 30);

+ RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlAverageGain",

+ static_cast<int>(10 * log10(gain_sum_ * gain_sum_ /

+ (kMetricsFrameInterval *

+ kMetricsFrameInterval))),

+ 0, 33, 30);

+ ResetEstimation();

+ }

+LevelController::LevelController()

+ : data_dumper_(new ApmDataDumper(instance_count_)),

+ gain_applier_(data_dumper_.get()),

+ signal_classifier_(data_dumper_.get()) {

+ Initialize(AudioProcessing::kSampleRate48kHz);

+ ++instance_count_;

+LevelController::~LevelController() {}

+void LevelController::Initialize(int sample_rate_hz) {

+ RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||

+ sample_rate_hz == AudioProcessing::kSampleRate16kHz ||

+ sample_rate_hz == AudioProcessing::kSampleRate32kHz ||

+ sample_rate_hz == AudioProcessing::kSampleRate48kHz);

+ data_dumper_->InitiateNewSetOfRecordings();

+ gain_selector_.Initialize(sample_rate_hz);

+ gain_applier_.Initialize(sample_rate_hz);

+ signal_classifier_.Initialize(sample_rate_hz);

+ noise_level_estimator_.Initialize(sample_rate_hz);

+ peak_level_estimator_.Initialize();

+ saturating_gain_estimator_.Initialize();

+ metrics_.Initialize(sample_rate_hz);

+ last_gain_ = 1.0f;

+ sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);

+ dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;

+void LevelController::Process(AudioBuffer* audio) {

+ RTC_DCHECK_LT(0u, audio->num_channels());

+ RTC_DCHECK_GE(2u, audio->num_channels());

+ RTC_DCHECK_NE(0.f, dc_forgetting_factor_);

+ RTC_DCHECK(sample_rate_hz_);

+ data_dumper_->DumpWav("lc_input", audio->num_frames(),

+ audio->channels_const_f()[0], *sample_rate_hz_, 1);

+ // Remove DC level.

+ for (size_t k = 0; k < audio->num_channels(); ++k) {

+ UpdateAndRemoveDcLevel(

+ dc_forgetting_factor_, &dc_level_[k],

+ rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));

+ }

+ SignalClassifier::SignalType signal_type;

+ signal_classifier_.Analyze(*audio, &signal_type);

+ int tmp = static_cast<int>(signal_type);

+ data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);

+ // Estimate the noise energy.

+ float noise_energy =

+ noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));

+ // Estimate the overall signal peak level.

+ float peak_level =

+ peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio));

+ float saturating_gain = saturating_gain_estimator_.GetGain();

+ // Compute the new gain to apply.

+ last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy,

+ saturating_gain, signal_type);

+ // Apply the gain to the signal.

+ int num_saturations = gain_applier_.Process(last_gain_, audio);

+ // Estimate the gain that saturates the overall signal.

+ saturating_gain_estimator_.Update(last_gain_, num_saturations);

+ // Update the metrics.

+ metrics_.Update(peak_level, noise_energy, last_gain_);

+ data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);

+ data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);

+ data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level);

+ data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);

+ data_dumper_->DumpWav("lc_output", audio->num_frames(),

+ audio->channels_f()[0], *sample_rate_hz_, 1);

+} // namespace webrtc