webrtc/modules/audio_processing/level_controller/level_controller.cc - Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module

Side by Side Diff: webrtc/modules/audio_processing/level_controller/level_controller.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Temporarily deactivated the level controller until the CL with the proper tuning has been landed Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/level_controller/level_controller.h ('k') | webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc » ('j') | webrtc/webrtc_tests.gypi » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"

	12

	13 #include <math.h>

	14 #include <algorithm>

	15 #include <numeric>

	16

	17 #include "webrtc/base/array_view.h"

	18 #include "webrtc/base/arraysize.h"

	19 #include "webrtc/base/checks.h"

	20 #include "webrtc/modules/audio_processing/audio_buffer.h"

	21 #include "webrtc/modules/audio_processing/level_controller/gain_applier.h"

	22 #include "webrtc/modules/audio_processing/level_controller/gain_selector.h"

	23 #include "webrtc/modules/audio_processing/level_controller/noise_level_estimator .h"

	24 #include "webrtc/modules/audio_processing/level_controller/peak_level_estimator. h"

	25 #include "webrtc/modules/audio_processing/level_controller/saturating_gain_estim ator.h"

	26 #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"

	27 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

	28 #include "webrtc/system_wrappers/include/metrics.h"

	29

	30 namespace webrtc {

	31 namespace {

	32

	33 void UpdateAndRemoveDcLevel(float forgetting_factor,

	34 float* dc_level,

	35 rtc::ArrayView<float> x) {

	36 RTC_DCHECK(!x.empty());

	37 float mean =

	38 std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size());

	39 dc_level += forgetting_factor (mean - *dc_level);

	40

	41 for (float& v : x) {

	42 v -= *dc_level;

	43 }

	44 }

	45

	46 float FrameEnergy(const AudioBuffer& audio) {

	47 float energy = 0.f;

	48 for (size_t k = 0; k < audio.num_channels(); ++k) {

	49 float channel_energy =

	50 std::accumulate(audio.channels_const_f()[k],

	51 audio.channels_const_f()[k] + audio.num_frames(), 0,

	52 [](float a, float b) -> float { return a + b * b; });

	53 energy = std::max(channel_energy, energy);

	54 }

	55 return energy;

	56 }

	57

	58 float PeakLevel(const AudioBuffer& audio) {

	59 float peak_level = 0.f;

	60 for (size_t k = 0; k < audio.num_channels(); ++k) {

	61 auto channel_peak_level = std::max_element(

	62 audio.channels_const_f()[k],

	63 audio.channels_const_f()[k] + audio.num_frames(),

	64 [](float a, float b) { return std::abs(a) < std::abs(b); });

	65 peak_level = std::max(*channel_peak_level, peak_level);

	66 }

	67 return peak_level;

	68 }

	69

	70 const int kMetricsFrameInterval = 1000;

	71

	72 } // namespace

	73

	74 int LevelController::instance_count_ = 0;

	75

	76 void LevelController::Metrics::Initialize(int sample_rate_hz) {

	77 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz \|\|

	78 sample_rate_hz == AudioProcessing::kSampleRate16kHz \|\|

	79 sample_rate_hz == AudioProcessing::kSampleRate32kHz \|\|

	80 sample_rate_hz == AudioProcessing::kSampleRate48kHz);

	81

	82 Reset();

	83 frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);

	84 }

	85

	86 void LevelController::Metrics::Reset() {

	87 metrics_frame_counter_ = 0;

	88 gain_sum_ = 0.f;

	89 peak_level_sum_ = 0.f;

	90 noise_energy_sum_ = 0.f;

	91 max_gain_ = 0.f;

	92 max_peak_level_ = 0.f;

	93 max_noise_energy_ = 0.f;

	94 }

	95

	96 void LevelController::Metrics::Update(float peak_level,

	97 float noise_energy,

	98 float gain) {

	99 const float kdBFSOffset = 90.3090f;

	100 gain_sum_ += gain;

	101 peak_level_sum_ += peak_level;

	102 noise_energy_sum_ += noise_energy;

	103 max_gain_ = std::max(max_gain_, gain);

	104 max_peak_level_ = std::max(max_peak_level_, peak_level);

	105 max_noise_energy_ = std::max(max_noise_energy_, noise_energy);

	106

	107 ++metrics_frame_counter_;

	108 if (metrics_frame_counter_ == kMetricsFrameInterval) {

	109 RTC_HISTOGRAM_COUNTS(

	110 "WebRTC.Audio.LevelControl.MaxNoisePower",

	111 static_cast<int>(10 * log10(max_noise_energy_ / frame_length_ + 1e-10f)

	112 - kdBFSOffset),

	113 -90, 0, 50);

	114 RTC_HISTOGRAM_COUNTS(

	115 "WebRTC.Audio.LevelControl.AverageNoisePower",

	116 static_cast<int>(10 * log10(noise_energy_sum_ /

	117 (frame_length_ * kMetricsFrameInterval) +

	118 1e-10f) - kdBFSOffset),

	119 -90, 0, 50);

	120

	121 RTC_HISTOGRAM_COUNTS(

	122 "WebRTC.Audio.LevelControl.MaxPeakLevel",

	123 static_cast<int>(10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f)

	124 - kdBFSOffset),

	125 -90, 0, 50);

	126 RTC_HISTOGRAM_COUNTS(

	127 "WebRTC.Audio.LevelControl.AveragePeakLevel",

	128 static_cast<int>(10 * log10(peak_level_sum_ * peak_level_sum_ /

	129 (kMetricsFrameInterval *

	130 kMetricsFrameInterval) +

	131 1e-10f) - kdBFSOffset),

	132 -90, 0, 50);

	133

	134 RTC_DCHECK_LE(1.f, max_gain_);

	135 RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);

	136 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain",

	137 static_cast<int>(10 * log10(max_gain_ * max_gain_)),

	138 0, 33, 30);

	139 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain",

	140 static_cast<int>(10 * log10(gain_sum_ * gain_sum_ /

	141 (kMetricsFrameInterval *

	142 kMetricsFrameInterval))),

	143 0, 33, 30);

	144 Reset();

	145 }

	146 }

	147

	148 LevelController::LevelController()

	149 : data_dumper_(new ApmDataDumper(instance_count_)),

	150 gain_applier_(data_dumper_.get()),

	151 signal_classifier_(data_dumper_.get()) {

	152 Initialize(AudioProcessing::kSampleRate48kHz);

	153 ++instance_count_;

	154 }

	155

	156 LevelController::~LevelController() {}

	157

	158 void LevelController::Initialize(int sample_rate_hz) {

	159 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz \|\|

	160 sample_rate_hz == AudioProcessing::kSampleRate16kHz \|\|

	161 sample_rate_hz == AudioProcessing::kSampleRate32kHz \|\|

	162 sample_rate_hz == AudioProcessing::kSampleRate48kHz);

	163 data_dumper_->InitiateNewSetOfRecordings();

	164 gain_selector_.Initialize(sample_rate_hz);

	165 gain_applier_.Initialize(sample_rate_hz);

	166 signal_classifier_.Initialize(sample_rate_hz);

	167 noise_level_estimator_.Initialize(sample_rate_hz);

	168 peak_level_estimator_.Initialize();

	169 saturating_gain_estimator_.Initialize();

	170 metrics_.Initialize(sample_rate_hz);

	171

	172 last_gain_ = 1.0f;

	173 sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);

	174 dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;

	175 std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f);

	176 }

	177

	178 void LevelController::Process(AudioBuffer* audio) {

	179 RTC_DCHECK_LT(0u, audio->num_channels());

	180 RTC_DCHECK_GE(2u, audio->num_channels());

	181 RTC_DCHECK_NE(0.f, dc_forgetting_factor_);

	182 RTC_DCHECK(sample_rate_hz_);

	183 data_dumper_->DumpWav("lc_input", audio->num_frames(),

	184 audio->channels_const_f()[0], *sample_rate_hz_, 1);

	185

	186 // Remove DC level.

	187 for (size_t k = 0; k < audio->num_channels(); ++k) {

	188 UpdateAndRemoveDcLevel(

	189 dc_forgetting_factor_, &dc_level_[k],

	190 rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));

	191 }

	192

	193 SignalClassifier::SignalType signal_type;

	194 signal_classifier_.Analyze(*audio, &signal_type);

	195 int tmp = static_cast<int>(signal_type);

	196 data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);

	197

	198 // Estimate the noise energy.

	199 float noise_energy =

	200 noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));

	201

	202 // Estimate the overall signal peak level.

	203 float peak_level =

	204 peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio));

	205

	206 float saturating_gain = saturating_gain_estimator_.GetGain();

	207

	208 // Compute the new gain to apply.

	209 last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy,

	210 saturating_gain, signal_type);

	211

	212 // Apply the gain to the signal.

	213 int num_saturations = gain_applier_.Process(last_gain_, audio);

	214

	215 // Estimate the gain that saturates the overall signal.

	216 saturating_gain_estimator_.Update(last_gain_, num_saturations);

	217

	218 // Update the metrics.

	219 metrics_.Update(peak_level, noise_energy, last_gain_);

	220

	221 data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);

	222 data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);

	223 data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level);

	224 data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);

	225

	226 data_dumper_->DumpWav("lc_output", audio->num_frames(),

	227 audio->channels_f()[0], *sample_rate_hz_, 1);

	228 }

	229

	230 } // namespace webrtc

OLD	NEW