OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/level_controller/level_controller.h" | |
12 | |
13 #include <math.h> | |
14 #include <algorithm> | |
15 #include <numeric> | |
16 | |
17 #include "webrtc/base/array_view.h" | |
18 #include "webrtc/base/checks.h" | |
19 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
20 #include "webrtc/modules/audio_processing/level_controller/gain_applier.h" | |
21 #include "webrtc/modules/audio_processing/level_controller/gain_selector.h" | |
22 #include "webrtc/modules/audio_processing/level_controller/noise_level_estimator .h" | |
23 #include "webrtc/modules/audio_processing/level_controller/peak_level_estimator. h" | |
24 #include "webrtc/modules/audio_processing/level_controller/saturating_gain_estim ator.h" | |
25 #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" | |
26 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" | |
27 #include "webrtc/system_wrappers/include/metrics.h" | |
28 | |
29 namespace webrtc { | |
30 namespace { | |
31 | |
32 void UpdateAndRemoveDcLevel(float forgetting_factor, | |
33 float* dc_level, | |
34 rtc::ArrayView<float> x) { | |
35 RTC_DCHECK(!x.empty()); | |
36 float mean = | |
37 std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size()); | |
38 *dc_level += forgetting_factor * (mean - *dc_level); | |
39 | |
40 for (float& v : x) { | |
41 v -= *dc_level; | |
42 } | |
43 } | |
44 | |
45 float FrameEnergy(const AudioBuffer& audio) { | |
46 float energy = 0.f; | |
47 for (size_t k = 0; k < audio.num_channels(); ++k) { | |
48 float channel_energy = | |
49 std::accumulate(audio.channels_const_f()[k], | |
50 audio.channels_const_f()[k] + audio.num_frames(), 0, | |
51 [](float a, float b) -> float { return a + b * b; }); | |
52 energy = std::max(channel_energy, energy); | |
53 } | |
54 return energy; | |
55 } | |
56 | |
57 float PeakLevel(const AudioBuffer& audio) { | |
58 float peak_level = 0.f; | |
59 for (size_t k = 0; k < audio.num_channels(); ++k) { | |
60 auto channel_peak_level = std::max_element( | |
61 audio.channels_const_f()[k], | |
62 audio.channels_const_f()[k] + audio.num_frames(), | |
63 [](float a, float b) { return std::abs(a) < std::abs(b); }); | |
64 peak_level = std::max(*channel_peak_level, peak_level); | |
65 } | |
66 return peak_level; | |
67 } | |
68 | |
69 const int kMetricsFrameInterval = 1000; | |
70 | |
71 } // namespace | |
72 | |
73 int LevelController::instance_count_ = 0; | |
74 | |
75 void LevelController::Metrics::Initialize(int sample_rate_hz) { | |
76 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || | |
77 sample_rate_hz == AudioProcessing::kSampleRate16kHz || | |
78 sample_rate_hz == AudioProcessing::kSampleRate32kHz || | |
79 sample_rate_hz == AudioProcessing::kSampleRate48kHz); | |
80 | |
81 ResetEstimation(); | |
82 frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); | |
83 } | |
84 | |
85 void LevelController::Metrics::ResetEstimation() { | |
hlundin-webrtc
2016/06/29 08:56:28
Just call this method Reset.
peah-webrtc
2016/06/29 09:13:53
Done.
| |
86 metrics_frame_counter_ = 0; | |
87 gain_sum_ = 0.f; | |
88 peak_level_sum_ = 0.f; | |
89 noise_energy_sum_ = 0.f; | |
90 max_gain_ = 0.f; | |
91 max_peak_level_ = 0.f; | |
92 max_noise_energy_ = 0.f; | |
93 } | |
94 | |
95 void LevelController::Metrics::Update(float peak_level, | |
96 float noise_energy, | |
97 float gain) { | |
98 const float kdBFSOffset = 90.3090f; | |
99 gain_sum_ += gain; | |
100 peak_level_sum_ += peak_level; | |
101 noise_energy_sum_ += noise_energy; | |
102 max_gain_ = std::max(max_gain_, gain); | |
103 max_peak_level_ = std::max(max_peak_level_, peak_level); | |
104 max_noise_energy_ = std::max(max_noise_energy_, noise_energy); | |
105 | |
106 ++metrics_frame_counter_; | |
107 if (metrics_frame_counter_ == kMetricsFrameInterval) { | |
108 RTC_HISTOGRAM_COUNTS( | |
109 "WebRTC.Audio.LevelControlMaxNoisePower", | |
hlundin-webrtc
2016/06/29 08:56:28
I think you should increase the readability of the
peah-webrtc
2016/06/29 09:13:53
Done.
| |
110 static_cast<int>(10 * | |
hlundin-webrtc
2016/06/29 08:56:28
This was an awkward line wrap. Did clang format pr
peah-webrtc
2016/06/29 09:13:53
Yes, that was clang format. Sorry, I did not check
| |
111 log10(max_noise_energy_ / frame_length_ + 1e-10f) - | |
112 kdBFSOffset), | |
113 -90, 0, 50); | |
114 RTC_HISTOGRAM_COUNTS( | |
115 "WebRTC.Audio.LevelControlAverageNoisePower", | |
116 static_cast<int>(10 * | |
117 log10(noise_energy_sum_ / | |
118 (frame_length_ * kMetricsFrameInterval) + | |
119 1e-10f) - | |
120 kdBFSOffset), | |
121 -90, 0, 50); | |
122 | |
123 RTC_HISTOGRAM_COUNTS( | |
124 "WebRTC.Audio.LevelControlMaxPeakLevel", | |
125 static_cast<int>(10 * | |
126 log10(max_peak_level_ * max_peak_level_ + 1e-10f) - | |
127 kdBFSOffset), | |
128 -90, 0, 50); | |
129 RTC_HISTOGRAM_COUNTS( | |
130 "WebRTC.Audio.LevelControlAveragePeakLevel", | |
131 static_cast<int>( | |
132 10 * log10(peak_level_sum_ * peak_level_sum_ / | |
133 (kMetricsFrameInterval * kMetricsFrameInterval) + | |
134 1e-10f) - | |
135 kdBFSOffset), | |
136 -90, 0, 50); | |
137 | |
138 RTC_DCHECK_LE(1.f, max_gain_); | |
139 RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); | |
140 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlMaxGain", | |
141 static_cast<int>(10 * log10(max_gain_ * max_gain_)), 0, | |
142 33, 30); | |
143 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlAverageGain", | |
144 static_cast<int>(10 * log10(gain_sum_ * gain_sum_ / | |
145 (kMetricsFrameInterval * | |
146 kMetricsFrameInterval))), | |
147 0, 33, 30); | |
148 ResetEstimation(); | |
149 } | |
150 } | |
151 | |
152 LevelController::LevelController() | |
153 : data_dumper_(new ApmDataDumper(instance_count_)), | |
154 gain_applier_(data_dumper_.get()), | |
155 signal_classifier_(data_dumper_.get()) { | |
156 Initialize(AudioProcessing::kSampleRate48kHz); | |
157 ++instance_count_; | |
158 } | |
159 | |
160 LevelController::~LevelController() {} | |
161 | |
162 void LevelController::Initialize(int sample_rate_hz) { | |
163 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || | |
164 sample_rate_hz == AudioProcessing::kSampleRate16kHz || | |
165 sample_rate_hz == AudioProcessing::kSampleRate32kHz || | |
166 sample_rate_hz == AudioProcessing::kSampleRate48kHz); | |
167 data_dumper_->InitiateNewSetOfRecordings(); | |
168 gain_selector_.Initialize(sample_rate_hz); | |
169 gain_applier_.Initialize(sample_rate_hz); | |
170 signal_classifier_.Initialize(sample_rate_hz); | |
171 noise_level_estimator_.Initialize(sample_rate_hz); | |
172 peak_level_estimator_.Initialize(); | |
173 saturating_gain_estimator_.Initialize(); | |
174 metrics_.Initialize(sample_rate_hz); | |
175 | |
176 last_gain_ = 1.0f; | |
177 sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz); | |
178 dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; | |
179 } | |
180 | |
181 void LevelController::Process(AudioBuffer* audio) { | |
182 RTC_DCHECK_LT(0u, audio->num_channels()); | |
183 RTC_DCHECK_GE(2u, audio->num_channels()); | |
184 RTC_DCHECK_NE(0.f, dc_forgetting_factor_); | |
185 RTC_DCHECK(sample_rate_hz_); | |
186 data_dumper_->DumpWav("lc_input", audio->num_frames(), | |
187 audio->channels_const_f()[0], *sample_rate_hz_, 1); | |
188 | |
189 // Remove DC level. | |
190 for (size_t k = 0; k < audio->num_channels(); ++k) { | |
191 UpdateAndRemoveDcLevel( | |
192 dc_forgetting_factor_, &dc_level_[k], | |
193 rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); | |
194 } | |
195 | |
196 SignalClassifier::SignalType signal_type; | |
197 signal_classifier_.Analyze(*audio, &signal_type); | |
198 int tmp = static_cast<int>(signal_type); | |
199 data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); | |
200 | |
201 // Estimate the noise energy. | |
202 float noise_energy = | |
203 noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); | |
204 | |
205 // Estimate the overall signal peak level. | |
206 float peak_level = | |
207 peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio)); | |
208 | |
209 float saturating_gain = saturating_gain_estimator_.GetGain(); | |
210 | |
211 // Compute the new gain to apply. | |
212 last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy, | |
213 saturating_gain, signal_type); | |
214 | |
215 // Apply the gain to the signal. | |
216 int num_saturations = gain_applier_.Process(last_gain_, audio); | |
217 | |
218 // Estimate the gain that saturates the overall signal. | |
219 saturating_gain_estimator_.Update(last_gain_, num_saturations); | |
220 | |
221 // Update the metrics. | |
222 metrics_.Update(peak_level, noise_energy, last_gain_); | |
223 | |
224 data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); | |
225 data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); | |
226 data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level); | |
227 data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); | |
228 | |
229 data_dumper_->DumpWav("lc_output", audio->num_frames(), | |
230 audio->channels_f()[0], *sample_rate_hz_, 1); | |
231 } | |
232 | |
233 } // namespace webrtc | |
OLD | NEW |