 Chromium Code Reviews
 Chromium Code Reviews Issue 2090583002:
  New module for the adaptive level controlling functionality in the audio processing module  (Closed) 
  Base URL: https://chromium.googlesource.com/external/webrtc.git@master
    
  
    Issue 2090583002:
  New module for the adaptive level controlling functionality in the audio processing module  (Closed) 
  Base URL: https://chromium.googlesource.com/external/webrtc.git@master| OLD | NEW | 
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #include "webrtc/modules/audio_processing/level_controller/level_controller.h" | |
| 12 | |
| 13 #include <math.h> | |
| 14 #include <algorithm> | |
| 15 #include <numeric> | |
| 16 | |
| 17 #include "webrtc/base/array_view.h" | |
| 18 #include "webrtc/base/checks.h" | |
| 19 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
| 20 #include "webrtc/modules/audio_processing/level_controller/gain_applier.h" | |
| 21 #include "webrtc/modules/audio_processing/level_controller/gain_selector.h" | |
| 22 #include "webrtc/modules/audio_processing/level_controller/noise_level_estimator .h" | |
| 23 #include "webrtc/modules/audio_processing/level_controller/peak_level_estimator. h" | |
| 24 #include "webrtc/modules/audio_processing/level_controller/saturating_gain_estim ator.h" | |
| 25 #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" | |
| 26 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" | |
| 27 #include "webrtc/system_wrappers/include/metrics.h" | |
| 28 | |
| 29 namespace webrtc { | |
| 30 namespace { | |
| 31 | |
| 32 void UpdateAndRemoveDcLevel(float forgetting_factor, | |
| 33 float* dc_level, | |
| 34 rtc::ArrayView<float> x) { | |
| 35 RTC_DCHECK(!x.empty()); | |
| 36 float mean = | |
| 37 std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size()); | |
| 38 *dc_level += forgetting_factor * (mean - *dc_level); | |
| 39 | |
| 40 for (float& v : x) { | |
| 41 v -= *dc_level; | |
| 42 } | |
| 43 } | |
| 44 | |
| 45 float FrameEnergy(const AudioBuffer& audio) { | |
| 46 float energy = 0.f; | |
| 47 for (size_t k = 0; k < audio.num_channels(); ++k) { | |
| 48 float channel_energy = | |
| 49 std::accumulate(audio.channels_const_f()[k], | |
| 50 audio.channels_const_f()[k] + audio.num_frames(), 0, | |
| 51 [](float a, float b) -> float { return a + b * b; }); | |
| 52 energy = std::max(channel_energy, energy); | |
| 53 } | |
| 54 return energy; | |
| 55 } | |
| 56 | |
| 57 float PeakLevel(const AudioBuffer& audio) { | |
| 58 float peak_level = 0.f; | |
| 59 for (size_t k = 0; k < audio.num_channels(); ++k) { | |
| 60 auto channel_peak_level = std::max_element( | |
| 61 audio.channels_const_f()[k], | |
| 62 audio.channels_const_f()[k] + audio.num_frames(), | |
| 63 [](float a, float b) { return std::abs(a) < std::abs(b); }); | |
| 64 peak_level = std::max(*channel_peak_level, peak_level); | |
| 65 } | |
| 66 return peak_level; | |
| 67 } | |
| 68 | |
| 69 const int kMetricsFrameInterval = 1000; | |
| 70 | |
| 71 } // namespace | |
| 72 | |
| 73 int LevelController::instance_count_ = 0; | |
| 74 | |
| 75 void LevelController::Metrics::Initialize(int sample_rate_hz) { | |
| 76 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || | |
| 77 sample_rate_hz == AudioProcessing::kSampleRate16kHz || | |
| 78 sample_rate_hz == AudioProcessing::kSampleRate32kHz || | |
| 79 sample_rate_hz == AudioProcessing::kSampleRate48kHz); | |
| 80 | |
| 81 ResetEstimation(); | |
| 82 frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); | |
| 83 } | |
| 84 | |
| 85 void LevelController::Metrics::ResetEstimation() { | |
| 
hlundin-webrtc
2016/06/29 08:56:28
Just call this method Reset.
 
peah-webrtc
2016/06/29 09:13:53
Done.
 | |
| 86 metrics_frame_counter_ = 0; | |
| 87 gain_sum_ = 0.f; | |
| 88 peak_level_sum_ = 0.f; | |
| 89 noise_energy_sum_ = 0.f; | |
| 90 max_gain_ = 0.f; | |
| 91 max_peak_level_ = 0.f; | |
| 92 max_noise_energy_ = 0.f; | |
| 93 } | |
| 94 | |
| 95 void LevelController::Metrics::Update(float peak_level, | |
| 96 float noise_energy, | |
| 97 float gain) { | |
| 98 const float kdBFSOffset = 90.3090f; | |
| 99 gain_sum_ += gain; | |
| 100 peak_level_sum_ += peak_level; | |
| 101 noise_energy_sum_ += noise_energy; | |
| 102 max_gain_ = std::max(max_gain_, gain); | |
| 103 max_peak_level_ = std::max(max_peak_level_, peak_level); | |
| 104 max_noise_energy_ = std::max(max_noise_energy_, noise_energy); | |
| 105 | |
| 106 ++metrics_frame_counter_; | |
| 107 if (metrics_frame_counter_ == kMetricsFrameInterval) { | |
| 108 RTC_HISTOGRAM_COUNTS( | |
| 109 "WebRTC.Audio.LevelControlMaxNoisePower", | |
| 
hlundin-webrtc
2016/06/29 08:56:28
I think you should increase the readability of the
 
peah-webrtc
2016/06/29 09:13:53
Done.
 | |
| 110 static_cast<int>(10 * | |
| 
hlundin-webrtc
2016/06/29 08:56:28
This was an awkward line wrap. Did clang format pr
 
peah-webrtc
2016/06/29 09:13:53
Yes, that was clang format. Sorry, I did not check
 | |
| 111 log10(max_noise_energy_ / frame_length_ + 1e-10f) - | |
| 112 kdBFSOffset), | |
| 113 -90, 0, 50); | |
| 114 RTC_HISTOGRAM_COUNTS( | |
| 115 "WebRTC.Audio.LevelControlAverageNoisePower", | |
| 116 static_cast<int>(10 * | |
| 117 log10(noise_energy_sum_ / | |
| 118 (frame_length_ * kMetricsFrameInterval) + | |
| 119 1e-10f) - | |
| 120 kdBFSOffset), | |
| 121 -90, 0, 50); | |
| 122 | |
| 123 RTC_HISTOGRAM_COUNTS( | |
| 124 "WebRTC.Audio.LevelControlMaxPeakLevel", | |
| 125 static_cast<int>(10 * | |
| 126 log10(max_peak_level_ * max_peak_level_ + 1e-10f) - | |
| 127 kdBFSOffset), | |
| 128 -90, 0, 50); | |
| 129 RTC_HISTOGRAM_COUNTS( | |
| 130 "WebRTC.Audio.LevelControlAveragePeakLevel", | |
| 131 static_cast<int>( | |
| 132 10 * log10(peak_level_sum_ * peak_level_sum_ / | |
| 133 (kMetricsFrameInterval * kMetricsFrameInterval) + | |
| 134 1e-10f) - | |
| 135 kdBFSOffset), | |
| 136 -90, 0, 50); | |
| 137 | |
| 138 RTC_DCHECK_LE(1.f, max_gain_); | |
| 139 RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); | |
| 140 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlMaxGain", | |
| 141 static_cast<int>(10 * log10(max_gain_ * max_gain_)), 0, | |
| 142 33, 30); | |
| 143 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlAverageGain", | |
| 144 static_cast<int>(10 * log10(gain_sum_ * gain_sum_ / | |
| 145 (kMetricsFrameInterval * | |
| 146 kMetricsFrameInterval))), | |
| 147 0, 33, 30); | |
| 148 ResetEstimation(); | |
| 149 } | |
| 150 } | |
| 151 | |
| 152 LevelController::LevelController() | |
| 153 : data_dumper_(new ApmDataDumper(instance_count_)), | |
| 154 gain_applier_(data_dumper_.get()), | |
| 155 signal_classifier_(data_dumper_.get()) { | |
| 156 Initialize(AudioProcessing::kSampleRate48kHz); | |
| 157 ++instance_count_; | |
| 158 } | |
| 159 | |
| 160 LevelController::~LevelController() {} | |
| 161 | |
| 162 void LevelController::Initialize(int sample_rate_hz) { | |
| 163 RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || | |
| 164 sample_rate_hz == AudioProcessing::kSampleRate16kHz || | |
| 165 sample_rate_hz == AudioProcessing::kSampleRate32kHz || | |
| 166 sample_rate_hz == AudioProcessing::kSampleRate48kHz); | |
| 167 data_dumper_->InitiateNewSetOfRecordings(); | |
| 168 gain_selector_.Initialize(sample_rate_hz); | |
| 169 gain_applier_.Initialize(sample_rate_hz); | |
| 170 signal_classifier_.Initialize(sample_rate_hz); | |
| 171 noise_level_estimator_.Initialize(sample_rate_hz); | |
| 172 peak_level_estimator_.Initialize(); | |
| 173 saturating_gain_estimator_.Initialize(); | |
| 174 metrics_.Initialize(sample_rate_hz); | |
| 175 | |
| 176 last_gain_ = 1.0f; | |
| 177 sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz); | |
| 178 dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; | |
| 179 } | |
| 180 | |
| 181 void LevelController::Process(AudioBuffer* audio) { | |
| 182 RTC_DCHECK_LT(0u, audio->num_channels()); | |
| 183 RTC_DCHECK_GE(2u, audio->num_channels()); | |
| 184 RTC_DCHECK_NE(0.f, dc_forgetting_factor_); | |
| 185 RTC_DCHECK(sample_rate_hz_); | |
| 186 data_dumper_->DumpWav("lc_input", audio->num_frames(), | |
| 187 audio->channels_const_f()[0], *sample_rate_hz_, 1); | |
| 188 | |
| 189 // Remove DC level. | |
| 190 for (size_t k = 0; k < audio->num_channels(); ++k) { | |
| 191 UpdateAndRemoveDcLevel( | |
| 192 dc_forgetting_factor_, &dc_level_[k], | |
| 193 rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); | |
| 194 } | |
| 195 | |
| 196 SignalClassifier::SignalType signal_type; | |
| 197 signal_classifier_.Analyze(*audio, &signal_type); | |
| 198 int tmp = static_cast<int>(signal_type); | |
| 199 data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); | |
| 200 | |
| 201 // Estimate the noise energy. | |
| 202 float noise_energy = | |
| 203 noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); | |
| 204 | |
| 205 // Estimate the overall signal peak level. | |
| 206 float peak_level = | |
| 207 peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio)); | |
| 208 | |
| 209 float saturating_gain = saturating_gain_estimator_.GetGain(); | |
| 210 | |
| 211 // Compute the new gain to apply. | |
| 212 last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy, | |
| 213 saturating_gain, signal_type); | |
| 214 | |
| 215 // Apply the gain to the signal. | |
| 216 int num_saturations = gain_applier_.Process(last_gain_, audio); | |
| 217 | |
| 218 // Estimate the gain that saturates the overall signal. | |
| 219 saturating_gain_estimator_.Update(last_gain_, num_saturations); | |
| 220 | |
| 221 // Update the metrics. | |
| 222 metrics_.Update(peak_level, noise_energy, last_gain_); | |
| 223 | |
| 224 data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); | |
| 225 data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); | |
| 226 data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level); | |
| 227 data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); | |
| 228 | |
| 229 data_dumper_->DumpWav("lc_output", audio->num_frames(), | |
| 230 audio->channels_f()[0], *sample_rate_hz_, 1); | |
| 231 } | |
| 232 | |
| 233 } // namespace webrtc | |
| OLD | NEW |