Chromium Code Reviews| Index: webrtc/modules/audio_processing/level_controller/level_controller.cc |
| diff --git a/webrtc/modules/audio_processing/level_controller/level_controller.cc b/webrtc/modules/audio_processing/level_controller/level_controller.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..d04daf2d4e8cce8248e0a1ff15d9db597ff9de5c |
| --- /dev/null |
| +++ b/webrtc/modules/audio_processing/level_controller/level_controller.cc |
| @@ -0,0 +1,233 @@ |
| +/* |
| + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| + * |
| + * Use of this source code is governed by a BSD-style license |
| + * that can be found in the LICENSE file in the root of the source |
| + * tree. An additional intellectual property rights grant can be found |
| + * in the file PATENTS. All contributing project authors may |
| + * be found in the AUTHORS file in the root of the source tree. |
| + */ |
| + |
| +#include "webrtc/modules/audio_processing/level_controller/level_controller.h" |
| + |
| +#include <math.h> |
| +#include <algorithm> |
| +#include <numeric> |
| + |
| +#include "webrtc/base/array_view.h" |
| +#include "webrtc/base/checks.h" |
| +#include "webrtc/modules/audio_processing/audio_buffer.h" |
| +#include "webrtc/modules/audio_processing/level_controller/gain_applier.h" |
| +#include "webrtc/modules/audio_processing/level_controller/gain_selector.h" |
| +#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h" |
| +#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h" |
| +#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h" |
| +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" |
| +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" |
| +#include "webrtc/system_wrappers/include/metrics.h" |
| + |
| +namespace webrtc { |
| +namespace { |
| + |
| +void UpdateAndRemoveDcLevel(float forgetting_factor, |
| + float* dc_level, |
| + rtc::ArrayView<float> x) { |
| + RTC_DCHECK(!x.empty()); |
| + float mean = |
| + std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size()); |
| + *dc_level += forgetting_factor * (mean - *dc_level); |
| + |
| + for (float& v : x) { |
| + v -= *dc_level; |
| + } |
| +} |
| + |
| +float FrameEnergy(const AudioBuffer& audio) { |
| + float energy = 0.f; |
| + for (size_t k = 0; k < audio.num_channels(); ++k) { |
| + float channel_energy = |
| + std::accumulate(audio.channels_const_f()[k], |
| + audio.channels_const_f()[k] + audio.num_frames(), 0, |
| + [](float a, float b) -> float { return a + b * b; }); |
| + energy = std::max(channel_energy, energy); |
| + } |
| + return energy; |
| +} |
| + |
| +float PeakLevel(const AudioBuffer& audio) { |
| + float peak_level = 0.f; |
| + for (size_t k = 0; k < audio.num_channels(); ++k) { |
| + auto channel_peak_level = std::max_element( |
| + audio.channels_const_f()[k], |
| + audio.channels_const_f()[k] + audio.num_frames(), |
| + [](float a, float b) { return std::abs(a) < std::abs(b); }); |
| + peak_level = std::max(*channel_peak_level, peak_level); |
| + } |
| + return peak_level; |
| +} |
| + |
| +const int kMetricsFrameInterval = 1000; |
| + |
| +} // namespace |
| + |
| +int LevelController::instance_count_ = 0; |
| + |
| +void LevelController::Metrics::Initialize(int sample_rate_hz) { |
| + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || |
| + sample_rate_hz == AudioProcessing::kSampleRate16kHz || |
| + sample_rate_hz == AudioProcessing::kSampleRate32kHz || |
| + sample_rate_hz == AudioProcessing::kSampleRate48kHz); |
| + |
| + ResetEstimation(); |
| + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); |
| +} |
| + |
| +void LevelController::Metrics::ResetEstimation() { |
|
hlundin-webrtc
2016/06/29 08:56:28
Just call this method Reset.
peah-webrtc
2016/06/29 09:13:53
Done.
|
| + metrics_frame_counter_ = 0; |
| + gain_sum_ = 0.f; |
| + peak_level_sum_ = 0.f; |
| + noise_energy_sum_ = 0.f; |
| + max_gain_ = 0.f; |
| + max_peak_level_ = 0.f; |
| + max_noise_energy_ = 0.f; |
| +} |
| + |
| +void LevelController::Metrics::Update(float peak_level, |
| + float noise_energy, |
| + float gain) { |
| + const float kdBFSOffset = 90.3090f; |
| + gain_sum_ += gain; |
| + peak_level_sum_ += peak_level; |
| + noise_energy_sum_ += noise_energy; |
| + max_gain_ = std::max(max_gain_, gain); |
| + max_peak_level_ = std::max(max_peak_level_, peak_level); |
| + max_noise_energy_ = std::max(max_noise_energy_, noise_energy); |
| + |
| + ++metrics_frame_counter_; |
| + if (metrics_frame_counter_ == kMetricsFrameInterval) { |
| + RTC_HISTOGRAM_COUNTS( |
| + "WebRTC.Audio.LevelControlMaxNoisePower", |
|
hlundin-webrtc
2016/06/29 08:56:28
I think you should increase the readability of the
peah-webrtc
2016/06/29 09:13:53
Done.
|
| + static_cast<int>(10 * |
|
hlundin-webrtc
2016/06/29 08:56:28
This was an awkward line wrap. Did clang format pr
peah-webrtc
2016/06/29 09:13:53
Yes, that was clang format. Sorry, I did not check
|
| + log10(max_noise_energy_ / frame_length_ + 1e-10f) - |
| + kdBFSOffset), |
| + -90, 0, 50); |
| + RTC_HISTOGRAM_COUNTS( |
| + "WebRTC.Audio.LevelControlAverageNoisePower", |
| + static_cast<int>(10 * |
| + log10(noise_energy_sum_ / |
| + (frame_length_ * kMetricsFrameInterval) + |
| + 1e-10f) - |
| + kdBFSOffset), |
| + -90, 0, 50); |
| + |
| + RTC_HISTOGRAM_COUNTS( |
| + "WebRTC.Audio.LevelControlMaxPeakLevel", |
| + static_cast<int>(10 * |
| + log10(max_peak_level_ * max_peak_level_ + 1e-10f) - |
| + kdBFSOffset), |
| + -90, 0, 50); |
| + RTC_HISTOGRAM_COUNTS( |
| + "WebRTC.Audio.LevelControlAveragePeakLevel", |
| + static_cast<int>( |
| + 10 * log10(peak_level_sum_ * peak_level_sum_ / |
| + (kMetricsFrameInterval * kMetricsFrameInterval) + |
| + 1e-10f) - |
| + kdBFSOffset), |
| + -90, 0, 50); |
| + |
| + RTC_DCHECK_LE(1.f, max_gain_); |
| + RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); |
| + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlMaxGain", |
| + static_cast<int>(10 * log10(max_gain_ * max_gain_)), 0, |
| + 33, 30); |
| + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControlAverageGain", |
| + static_cast<int>(10 * log10(gain_sum_ * gain_sum_ / |
| + (kMetricsFrameInterval * |
| + kMetricsFrameInterval))), |
| + 0, 33, 30); |
| + ResetEstimation(); |
| + } |
| +} |
| + |
| +LevelController::LevelController() |
| + : data_dumper_(new ApmDataDumper(instance_count_)), |
| + gain_applier_(data_dumper_.get()), |
| + signal_classifier_(data_dumper_.get()) { |
| + Initialize(AudioProcessing::kSampleRate48kHz); |
| + ++instance_count_; |
| +} |
| + |
| +LevelController::~LevelController() {} |
| + |
| +void LevelController::Initialize(int sample_rate_hz) { |
| + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || |
| + sample_rate_hz == AudioProcessing::kSampleRate16kHz || |
| + sample_rate_hz == AudioProcessing::kSampleRate32kHz || |
| + sample_rate_hz == AudioProcessing::kSampleRate48kHz); |
| + data_dumper_->InitiateNewSetOfRecordings(); |
| + gain_selector_.Initialize(sample_rate_hz); |
| + gain_applier_.Initialize(sample_rate_hz); |
| + signal_classifier_.Initialize(sample_rate_hz); |
| + noise_level_estimator_.Initialize(sample_rate_hz); |
| + peak_level_estimator_.Initialize(); |
| + saturating_gain_estimator_.Initialize(); |
| + metrics_.Initialize(sample_rate_hz); |
| + |
| + last_gain_ = 1.0f; |
| + sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz); |
| + dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; |
| +} |
| + |
| +void LevelController::Process(AudioBuffer* audio) { |
| + RTC_DCHECK_LT(0u, audio->num_channels()); |
| + RTC_DCHECK_GE(2u, audio->num_channels()); |
| + RTC_DCHECK_NE(0.f, dc_forgetting_factor_); |
| + RTC_DCHECK(sample_rate_hz_); |
| + data_dumper_->DumpWav("lc_input", audio->num_frames(), |
| + audio->channels_const_f()[0], *sample_rate_hz_, 1); |
| + |
| + // Remove DC level. |
| + for (size_t k = 0; k < audio->num_channels(); ++k) { |
| + UpdateAndRemoveDcLevel( |
| + dc_forgetting_factor_, &dc_level_[k], |
| + rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); |
| + } |
| + |
| + SignalClassifier::SignalType signal_type; |
| + signal_classifier_.Analyze(*audio, &signal_type); |
| + int tmp = static_cast<int>(signal_type); |
| + data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); |
| + |
| + // Estimate the noise energy. |
| + float noise_energy = |
| + noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); |
| + |
| + // Estimate the overall signal peak level. |
| + float peak_level = |
| + peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio)); |
| + |
| + float saturating_gain = saturating_gain_estimator_.GetGain(); |
| + |
| + // Compute the new gain to apply. |
| + last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy, |
| + saturating_gain, signal_type); |
| + |
| + // Apply the gain to the signal. |
| + int num_saturations = gain_applier_.Process(last_gain_, audio); |
| + |
| + // Estimate the gain that saturates the overall signal. |
| + saturating_gain_estimator_.Update(last_gain_, num_saturations); |
| + |
| + // Update the metrics. |
| + metrics_.Update(peak_level, noise_energy, last_gain_); |
| + |
| + data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); |
| + data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); |
| + data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level); |
| + data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); |
| + |
| + data_dumper_->DumpWav("lc_output", audio->num_frames(), |
| + audio->channels_f()[0], *sample_rate_hz_, 1); |
| +} |
| + |
| +} // namespace webrtc |