Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(384)

Side by Side Diff: webrtc/modules/audio_processing/level_controller/level_controller.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Added reporting of metrics Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"
12
13 #include <math.h>
14 #include <algorithm>
15 #include <numeric>
16
17 #include "webrtc/base/array_view.h"
18 #include "webrtc/base/checks.h"
19 #include "webrtc/modules/audio_processing/audio_buffer.h"
20 #include "webrtc/modules/audio_processing/level_controller/gain_applier.h"
21 #include "webrtc/modules/audio_processing/level_controller/gain_selector.h"
22 #include "webrtc/modules/audio_processing/level_controller/noise_level_estimator .h"
23 #include "webrtc/modules/audio_processing/level_controller/peak_level_estimator. h"
24 #include "webrtc/modules/audio_processing/level_controller/saturating_gain_estim ator.h"
25 #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
26 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
27 #include "webrtc/system_wrappers/include/metrics.h"
28
29 namespace webrtc {
30 namespace {
31
32 void UpdateAndRemoveDcLevel(float forgetting_factor,
33 float* dc_level,
34 rtc::ArrayView<float> x) {
35 RTC_DCHECK(!x.empty());
36 float mean =
37 std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size());
38 *dc_level += forgetting_factor * (mean - *dc_level);
39
40 for (float& v : x) {
41 v -= *dc_level;
42 }
43 }
44
45 float FrameEnergy(const AudioBuffer& audio) {
46 float energy = 0.f;
47 for (size_t k = 0; k < audio.num_channels(); ++k) {
48 float channel_energy =
49 std::accumulate(audio.channels_const_f()[k],
50 audio.channels_const_f()[k] + audio.num_frames(), 0,
51 [](float a, float b) -> float { return a + b * b; });
52 energy = std::max(channel_energy, energy);
53 }
54 return energy;
55 }
56
57 float PeakLevel(const AudioBuffer& audio) {
58 float peak_level = 0.f;
59 for (size_t k = 0; k < audio.num_channels(); ++k) {
60 auto channel_peak_level = std::max_element(
61 audio.channels_const_f()[k],
62 audio.channels_const_f()[k] + audio.num_frames(),
63 [](float a, float b) { return std::abs(a) < std::abs(b); });
64 peak_level = std::max(*channel_peak_level, peak_level);
65 }
66 return peak_level;
67 }
68
69 const int kMetricsFrameInterval = 1000;
70
71 } // namespace
72
73 int LevelController::instance_count_ = 0;
74
75 void LevelController::Metrics::Initialize(int sample_rate_hz) {
76 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
77 sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
78 sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
79 sample_rate_hz == AudioProcessing::kSampleRate48kHz);
80
81 ResetEstimation();
82 frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
83 }
84
85 void LevelController::Metrics::ResetEstimation() {
hlundin-webrtc 2016/06/29 08:56:28 Just call this method Reset.
peah-webrtc 2016/06/29 09:13:53 Done.
86 metrics_frame_counter_ = 0;
87 gain_sum_ = 0.f;
88 peak_level_sum_ = 0.f;
89 noise_energy_sum_ = 0.f;
90 max_gain_ = 0.f;
91 max_peak_level_ = 0.f;
92 max_noise_energy_ = 0.f;
93 }
94
95 void LevelController::Metrics::Update(float peak_level,
96 float noise_energy,
97 float gain) {
98 const float kdBFSOffset = 90.3090f;
99 gain_sum_ += gain;
100 peak_level_sum_ += peak_level;
101 noise_energy_sum_ += noise_energy;
102 max_gain_ = std::max(max_gain_, gain);
103 max_peak_level_ = std::max(max_peak_level_, peak_level);
104 max_noise_energy_ = std::max(max_noise_energy_, noise_energy);
105
106 ++metrics_frame_counter_;
107 if (metrics_frame_counter_ == kMetricsFrameInterval) {
108 RTC_HISTOGRAM_COUNTS(
109 "WebRTC.Audio.LevelControlMaxNoisePower",
hlundin-webrtc 2016/06/29 08:56:28 I think you should increase the readability of the
peah-webrtc 2016/06/29 09:13:53 Done.
110 static_cast<int>(10 *
hlundin-webrtc 2016/06/29 08:56:28 This was an awkward line wrap. Did clang format pr
peah-webrtc 2016/06/29 09:13:53 Yes, that was clang format. Sorry, I did not check
111 log10(max_noise_energy_ / frame_length_ + 1e-10f) -
112 kdBFSOffset),
113 -90, 0, 50);
114 RTC_HISTOGRAM_COUNTS(
115 "WebRTC.Audio.LevelControlAverageNoisePower",
116 static_cast<int>(10 *
117 log10(noise_energy_sum_ /
118 (frame_length_ * kMetricsFrameInterval) +
119 1e-10f) -
120 kdBFSOffset),
121 -90, 0, 50);
122
123 RTC_HISTOGRAM_COUNTS(
124 "WebRTC.Audio.LevelControlMaxPeakLevel",
125 static_cast<int>(10 *
126 log10(max_peak_level_ * max_peak_level_ + 1e-10f) -
127 kdBFSOffset),
128 -90, 0, 50);
129 RTC_HISTOGRAM_COUNTS(
130 "WebRTC.Audio.LevelControlAveragePeakLevel",
131 static_cast<int>(
132 10 * log10(peak_level_sum_ * peak_level_sum_ /
133 (kMetricsFrameInterval * kMetricsFrameInterval) +
134 1e-10f) -
135 kdBFSOffset),
136 -90, 0, 50);
137
138 RTC_DCHECK_LE(1.f, max_gain_);
139 RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);
140 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlMaxGain",
141 static_cast<int>(10 * log10(max_gain_ * max_gain_)), 0,
142 33, 30);
143 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlAverageGain",
144 static_cast<int>(10 * log10(gain_sum_ * gain_sum_ /
145 (kMetricsFrameInterval *
146 kMetricsFrameInterval))),
147 0, 33, 30);
148 ResetEstimation();
149 }
150 }
151
152 LevelController::LevelController()
153 : data_dumper_(new ApmDataDumper(instance_count_)),
154 gain_applier_(data_dumper_.get()),
155 signal_classifier_(data_dumper_.get()) {
156 Initialize(AudioProcessing::kSampleRate48kHz);
157 ++instance_count_;
158 }
159
160 LevelController::~LevelController() {}
161
162 void LevelController::Initialize(int sample_rate_hz) {
163 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
164 sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
165 sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
166 sample_rate_hz == AudioProcessing::kSampleRate48kHz);
167 data_dumper_->InitiateNewSetOfRecordings();
168 gain_selector_.Initialize(sample_rate_hz);
169 gain_applier_.Initialize(sample_rate_hz);
170 signal_classifier_.Initialize(sample_rate_hz);
171 noise_level_estimator_.Initialize(sample_rate_hz);
172 peak_level_estimator_.Initialize();
173 saturating_gain_estimator_.Initialize();
174 metrics_.Initialize(sample_rate_hz);
175
176 last_gain_ = 1.0f;
177 sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);
178 dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;
179 }
180
181 void LevelController::Process(AudioBuffer* audio) {
182 RTC_DCHECK_LT(0u, audio->num_channels());
183 RTC_DCHECK_GE(2u, audio->num_channels());
184 RTC_DCHECK_NE(0.f, dc_forgetting_factor_);
185 RTC_DCHECK(sample_rate_hz_);
186 data_dumper_->DumpWav("lc_input", audio->num_frames(),
187 audio->channels_const_f()[0], *sample_rate_hz_, 1);
188
189 // Remove DC level.
190 for (size_t k = 0; k < audio->num_channels(); ++k) {
191 UpdateAndRemoveDcLevel(
192 dc_forgetting_factor_, &dc_level_[k],
193 rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
194 }
195
196 SignalClassifier::SignalType signal_type;
197 signal_classifier_.Analyze(*audio, &signal_type);
198 int tmp = static_cast<int>(signal_type);
199 data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);
200
201 // Estimate the noise energy.
202 float noise_energy =
203 noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));
204
205 // Estimate the overall signal peak level.
206 float peak_level =
207 peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio));
208
209 float saturating_gain = saturating_gain_estimator_.GetGain();
210
211 // Compute the new gain to apply.
212 last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy,
213 saturating_gain, signal_type);
214
215 // Apply the gain to the signal.
216 int num_saturations = gain_applier_.Process(last_gain_, audio);
217
218 // Estimate the gain that saturates the overall signal.
219 saturating_gain_estimator_.Update(last_gain_, num_saturations);
220
221 // Update the metrics.
222 metrics_.Update(peak_level, noise_energy, last_gain_);
223
224 data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);
225 data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);
226 data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level);
227 data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);
228
229 data_dumper_->DumpWav("lc_output", audio->num_frames(),
230 audio->channels_f()[0], *sample_rate_hz_, 1);
231 }
232
233 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698