webrtc/modules/audio_processing/level_controller/level_controller.cc - Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module

Side by Side Diff: webrtc/modules/audio_processing/level_controller/level_controller.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Added reporting of metrics Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/level_controller/level_controller.h ('K') | « webrtc/modules/audio_processing/level_controller/level_controller.h ('k') | webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"

	12

	13 #include <math.h>

	14 #include <algorithm>

	15 #include <numeric>

	16

	17 #include "webrtc/base/array_view.h"

	18 #include "webrtc/base/checks.h"

	19 #include "webrtc/modules/audio_processing/audio_buffer.h"

	20 #include "webrtc/modules/audio_processing/level_controller/gain_applier.h"

	21 #include "webrtc/modules/audio_processing/level_controller/gain_selector.h"

	22 #include "webrtc/modules/audio_processing/level_controller/noise_level_estimator .h"

	23 #include "webrtc/modules/audio_processing/level_controller/peak_level_estimator. h"

	24 #include "webrtc/modules/audio_processing/level_controller/saturating_gain_estim ator.h"

	25 #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"

	26 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

	27 #include "webrtc/system_wrappers/include/metrics.h"

	28

	29 namespace webrtc {

	30 namespace {

	31

	32 void UpdateAndRemoveDcLevel(float forgetting_factor,

	33 float* dc_level,

	34 rtc::ArrayView<float> x) {

	35 RTC_DCHECK(!x.empty());

	36 float mean =

	37 std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size());

	38 dc_level += forgetting_factor (mean - *dc_level);

	39

	40 for (float& v : x) {

	41 v -= *dc_level;

	42 }

	43 }

	44

	45 float FrameEnergy(const AudioBuffer& audio) {

	46 float energy = 0.f;

	47 for (size_t k = 0; k < audio.num_channels(); ++k) {

	48 float channel_energy =

	49 std::accumulate(audio.channels_const_f()[k],

	50 audio.channels_const_f()[k] + audio.num_frames(), 0,

	51 [](float a, float b) -> float { return a + b * b; });

	52 energy = std::max(channel_energy, energy);

	53 }

	54 return energy;

	55 }

	56

	57 float PeakLevel(const AudioBuffer& audio) {

	58 float peak_level = 0.f;

	59 for (size_t k = 0; k < audio.num_channels(); ++k) {

	60 auto channel_peak_level = std::max_element(

	61 audio.channels_const_f()[k],

	62 audio.channels_const_f()[k] + audio.num_frames(),

	63 [](float a, float b) { return std::abs(a) < std::abs(b); });

	64 peak_level = std::max(*channel_peak_level, peak_level);

	65 }

	66 return peak_level;

	67 }

	68

	69 const int kMetricsFrameInterval = 1000;

	70

	71 } // namespace

	72

	73 int LevelController::instance_count_ = 0;

	74

	75 void LevelController::Metrics::Initialize(int sample_rate_hz) {

	76 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz \|\|

	77 sample_rate_hz == AudioProcessing::kSampleRate16kHz \|\|

	78 sample_rate_hz == AudioProcessing::kSampleRate32kHz \|\|

	79 sample_rate_hz == AudioProcessing::kSampleRate48kHz);

	80

	81 ResetEstimation();

	82 frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);

	83 }

	84

	85 void LevelController::Metrics::ResetEstimation() {
	hlundin-webrtc 2016/06/29 08:56:28 Just call this method Reset. Just call this method Reset. peah-webrtc 2016/06/29 09:13:53 Done. Show quoted text On 2016/06/29 08:56:28, hlundin-webrtc wrote: > Just call this method Reset. Done.
	86 metrics_frame_counter_ = 0;

	87 gain_sum_ = 0.f;

	88 peak_level_sum_ = 0.f;

	89 noise_energy_sum_ = 0.f;

	90 max_gain_ = 0.f;

	91 max_peak_level_ = 0.f;

	92 max_noise_energy_ = 0.f;

	93 }

	94

	95 void LevelController::Metrics::Update(float peak_level,

	96 float noise_energy,

	97 float gain) {

	98 const float kdBFSOffset = 90.3090f;

	99 gain_sum_ += gain;

	100 peak_level_sum_ += peak_level;

	101 noise_energy_sum_ += noise_energy;

	102 max_gain_ = std::max(max_gain_, gain);

	103 max_peak_level_ = std::max(max_peak_level_, peak_level);

	104 max_noise_energy_ = std::max(max_noise_energy_, noise_energy);

	105

	106 ++metrics_frame_counter_;

	107 if (metrics_frame_counter_ == kMetricsFrameInterval) {

	108 RTC_HISTOGRAM_COUNTS(

	109 "WebRTC.Audio.LevelControlMaxNoisePower",
	hlundin-webrtc 2016/06/29 08:56:28 I think you should increase the readability of the I think you should increase the readability of the histogram names by adding a '.' after LeveLControl. In this case "WebRTC.Audio.LevelControl.MaxNoisePower" and similar below. peah-webrtc 2016/06/29 09:13:53 Done. Show quoted text On 2016/06/29 08:56:28, hlundin-webrtc wrote: > I think you should increase the readability of the histogram names by adding a > '.' after LeveLControl. In this case "WebRTC.Audio.LevelControl.MaxNoisePower" > and similar below. Done.
	110 static_cast<int>(10 *
	hlundin-webrtc 2016/06/29 08:56:28 This was an awkward line wrap. Did clang format pr This was an awkward line wrap. Did clang format produce this? peah-webrtc 2016/06/29 09:13:53 Yes, that was clang format. Sorry, I did not check Show quoted text On 2016/06/29 08:56:28, hlundin-webrtc wrote: > This was an awkward line wrap. Did clang format produce this? Yes, that was clang format. Sorry, I did not check the result. I now instead manually reformatted this part of the code. Could you please check that it looks ok?
	111 log10(max_noise_energy_ / frame_length_ + 1e-10f) -

	112 kdBFSOffset),

	113 -90, 0, 50);

	114 RTC_HISTOGRAM_COUNTS(

	115 "WebRTC.Audio.LevelControlAverageNoisePower",

	116 static_cast<int>(10 *

	117 log10(noise_energy_sum_ /

	118 (frame_length_ * kMetricsFrameInterval) +

	119 1e-10f) -

	120 kdBFSOffset),

	121 -90, 0, 50);

	122

	123 RTC_HISTOGRAM_COUNTS(

	124 "WebRTC.Audio.LevelControlMaxPeakLevel",

	125 static_cast<int>(10 *

	126 log10(max_peak_level_ * max_peak_level_ + 1e-10f) -

	127 kdBFSOffset),

	128 -90, 0, 50);

	129 RTC_HISTOGRAM_COUNTS(

	130 "WebRTC.Audio.LevelControlAveragePeakLevel",

	131 static_cast<int>(

	132 10 * log10(peak_level_sum_ * peak_level_sum_ /

	133 (kMetricsFrameInterval * kMetricsFrameInterval) +

	134 1e-10f) -

	135 kdBFSOffset),

	136 -90, 0, 50);

	137

	138 RTC_DCHECK_LE(1.f, max_gain_);

	139 RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);

	140 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlMaxGain",

	141 static_cast<int>(10 * log10(max_gain_ * max_gain_)), 0,

	142 33, 30);

	143 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlAverageGain",

	144 static_cast<int>(10 * log10(gain_sum_ * gain_sum_ /

	145 (kMetricsFrameInterval *

	146 kMetricsFrameInterval))),

	147 0, 33, 30);

	148 ResetEstimation();

	149 }

	150 }

	151

	152 LevelController::LevelController()

	153 : data_dumper_(new ApmDataDumper(instance_count_)),

	154 gain_applier_(data_dumper_.get()),

	155 signal_classifier_(data_dumper_.get()) {

	156 Initialize(AudioProcessing::kSampleRate48kHz);

	157 ++instance_count_;

	158 }

	159

	160 LevelController::~LevelController() {}

	161

	162 void LevelController::Initialize(int sample_rate_hz) {

	163 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz \|\|

	164 sample_rate_hz == AudioProcessing::kSampleRate16kHz \|\|

	165 sample_rate_hz == AudioProcessing::kSampleRate32kHz \|\|

	166 sample_rate_hz == AudioProcessing::kSampleRate48kHz);

	167 data_dumper_->InitiateNewSetOfRecordings();

	168 gain_selector_.Initialize(sample_rate_hz);

	169 gain_applier_.Initialize(sample_rate_hz);

	170 signal_classifier_.Initialize(sample_rate_hz);

	171 noise_level_estimator_.Initialize(sample_rate_hz);

	172 peak_level_estimator_.Initialize();

	173 saturating_gain_estimator_.Initialize();

	174 metrics_.Initialize(sample_rate_hz);

	175

	176 last_gain_ = 1.0f;

	177 sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);

	178 dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;

	179 }

	180

	181 void LevelController::Process(AudioBuffer* audio) {

	182 RTC_DCHECK_LT(0u, audio->num_channels());

	183 RTC_DCHECK_GE(2u, audio->num_channels());

	184 RTC_DCHECK_NE(0.f, dc_forgetting_factor_);

	185 RTC_DCHECK(sample_rate_hz_);

	186 data_dumper_->DumpWav("lc_input", audio->num_frames(),

	187 audio->channels_const_f()[0], *sample_rate_hz_, 1);

	188

	189 // Remove DC level.

	190 for (size_t k = 0; k < audio->num_channels(); ++k) {

	191 UpdateAndRemoveDcLevel(

	192 dc_forgetting_factor_, &dc_level_[k],

	193 rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));

	194 }

	195

	196 SignalClassifier::SignalType signal_type;

	197 signal_classifier_.Analyze(*audio, &signal_type);

	198 int tmp = static_cast<int>(signal_type);

	199 data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);

	200

	201 // Estimate the noise energy.

	202 float noise_energy =

	203 noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));

	204

	205 // Estimate the overall signal peak level.

	206 float peak_level =

	207 peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio));

	208

	209 float saturating_gain = saturating_gain_estimator_.GetGain();

	210

	211 // Compute the new gain to apply.

	212 last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy,

	213 saturating_gain, signal_type);

	214

	215 // Apply the gain to the signal.

	216 int num_saturations = gain_applier_.Process(last_gain_, audio);

	217

	218 // Estimate the gain that saturates the overall signal.

	219 saturating_gain_estimator_.Update(last_gain_, num_saturations);

	220

	221 // Update the metrics.

	222 metrics_.Update(peak_level, noise_energy, last_gain_);

	223

	224 data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);

	225 data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);

	226 data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level);

	227 data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);

	228

	229 data_dumper_->DumpWav("lc_output", audio->num_frames(),

	230 audio->channels_f()[0], *sample_rate_hz_, 1);

	231 }

	232

	233 } // namespace webrtc

OLD	NEW