Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2104273002: Add logging to Intelligibility Enhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Log num_chunks_ when activating and deactivating Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h" 11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
12 12
13 #include <math.h> 13 #include <math.h>
14 #include <stdlib.h> 14 #include <stdlib.h>
15 #include <algorithm> 15 #include <algorithm>
16 #include <limits> 16 #include <limits>
17 #include <numeric> 17 #include <numeric>
18 18
19 #include "webrtc/base/checks.h" 19 #include "webrtc/base/checks.h"
20 #include "webrtc/base/logging.h"
20 #include "webrtc/common_audio/include/audio_util.h" 21 #include "webrtc/common_audio/include/audio_util.h"
21 #include "webrtc/common_audio/window_generator.h" 22 #include "webrtc/common_audio/window_generator.h"
22 23
23 namespace webrtc { 24 namespace webrtc {
24 25
25 namespace { 26 namespace {
26 27
27 const size_t kErbResolution = 2; 28 const size_t kErbResolution = 2;
28 const int kWindowSizeMs = 16; 29 const int kWindowSizeMs = 16;
29 const int kChunkSizeMs = 10; // Size provided by APM. 30 const int kChunkSizeMs = 10; // Size provided by APM.
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
83 capture_filter_bank_(CreateErbBank(num_noise_bins)), 84 capture_filter_bank_(CreateErbBank(num_noise_bins)),
84 render_filter_bank_(CreateErbBank(freqs_)), 85 render_filter_bank_(CreateErbBank(freqs_)),
85 gains_eq_(bank_size_), 86 gains_eq_(bank_size_),
86 gain_applier_(freqs_, kMaxRelativeGainChange), 87 gain_applier_(freqs_, kMaxRelativeGainChange),
87 audio_s16_(chunk_length_), 88 audio_s16_(chunk_length_),
88 chunks_since_voice_(kSpeechOffsetDelay), 89 chunks_since_voice_(kSpeechOffsetDelay),
89 is_speech_(false), 90 is_speech_(false),
90 snr_(kMaxActiveSNR), 91 snr_(kMaxActiveSNR),
91 is_active_(false), 92 is_active_(false),
92 num_chunks_(0u), 93 num_chunks_(0u),
94 num_active_chunks_(0u),
93 noise_estimation_buffer_(num_noise_bins), 95 noise_estimation_buffer_(num_noise_bins),
94 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, 96 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
95 std::vector<float>(num_noise_bins), 97 std::vector<float>(num_noise_bins),
96 RenderQueueItemVerifier<float>(num_noise_bins)) { 98 RenderQueueItemVerifier<float>(num_noise_bins)) {
97 RTC_DCHECK_LE(kRho, 1.f); 99 RTC_DCHECK_LE(kRho, 1.f);
98 100
99 const size_t erb_index = static_cast<size_t>( 101 const size_t erb_index = static_cast<size_t>(
100 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + 102 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
101 43.f)); 103 43.f));
102 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); 104 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
103 105
104 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); 106 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
105 std::vector<float> kbd_window(window_size); 107 std::vector<float> kbd_window(window_size);
106 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, 108 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
107 kbd_window.data()); 109 kbd_window.data());
108 render_mangler_.reset(new LappedTransform( 110 render_mangler_.reset(new LappedTransform(
109 num_render_channels_, num_render_channels_, chunk_length_, 111 num_render_channels_, num_render_channels_, chunk_length_,
110 kbd_window.data(), window_size, window_size / 2, this)); 112 kbd_window.data(), window_size, window_size / 2, this));
111 } 113 }
112 114
115 IntelligibilityEnhancer::~IntelligibilityEnhancer() {
116 // Don't rely on this log, since the destructor isn't called when the app/tab
117 // is killed.
118 LOG(LS_INFO) << "Intelligibility Enhancer was active for "
119 << static_cast<float>(num_active_chunks_) / num_chunks_
120 << "% of the call.";
121 }
122
113 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( 123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
114 std::vector<float> noise, int gain_db) { 124 std::vector<float> noise, int gain_db) {
115 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); 125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
116 const float gain = std::pow(10.f, gain_db / 20.f); 126 const float gain = std::pow(10.f, gain_db / 20.f);
117 for (auto& bin : noise) { 127 for (auto& bin : noise) {
118 bin *= gain; 128 bin *= gain;
119 } 129 }
120 // Disregarding return value since buffer overflow is acceptable, because it 130 // Disregarding return value since buffer overflow is acceptable, because it
121 // is not critical to get each noise estimate. 131 // is not critical to get each noise estimate.
122 if (noise_estimation_queue_.Insert(&noise)) { 132 if (noise_estimation_queue_.Insert(&noise)) {
(...skipping 16 matching lines...) Expand all
139 const std::complex<float>* const* in_block, 149 const std::complex<float>* const* in_block,
140 size_t in_channels, 150 size_t in_channels,
141 size_t frames, 151 size_t frames,
142 size_t /* out_channels */, 152 size_t /* out_channels */,
143 std::complex<float>* const* out_block) { 153 std::complex<float>* const* out_block) {
144 RTC_DCHECK_EQ(freqs_, frames); 154 RTC_DCHECK_EQ(freqs_, frames);
145 if (is_speech_) { 155 if (is_speech_) {
146 clear_power_estimator_.Step(in_block[0]); 156 clear_power_estimator_.Step(in_block[0]);
147 } 157 }
148 SnrBasedEffectActivation(); 158 SnrBasedEffectActivation();
149 if (is_active_ && num_chunks_++ % kGainUpdatePeriod == 0) { 159 ++num_chunks_;
150 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, 160 if (is_active_) {
151 filtered_clear_pow_.data()); 161 ++num_active_chunks_;
152 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, 162 if (num_chunks_ % kGainUpdatePeriod == 0) {
153 filtered_noise_pow_.data()); 163 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
154 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); 164 filtered_clear_pow_.data());
155 const float power_target = std::accumulate( 165 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
156 filtered_clear_pow_.data(), 166 filtered_noise_pow_.data());
157 filtered_clear_pow_.data() + bank_size_, 167 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
158 0.f); 168 const float power_target = std::accumulate(
159 const float power_top = 169 filtered_clear_pow_.data(),
160 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 170 filtered_clear_pow_.data() + bank_size_,
161 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); 171 0.f);
162 const float power_bot = 172 const float power_top =
163 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
164 if (power_target >= power_bot && power_target <= power_top) { 174 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
165 SolveForLambda(power_target); 175 const float power_bot =
166 UpdateErbGains(); 176 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
167 } // Else experiencing power underflow, so do nothing. 177 if (power_target >= power_bot && power_target <= power_top) {
178 SolveForLambda(power_target);
179 UpdateErbGains();
180 } // Else experiencing power underflow, so do nothing.
181 }
168 } 182 }
169 for (size_t i = 0; i < in_channels; ++i) { 183 for (size_t i = 0; i < in_channels; ++i) {
170 gain_applier_.Apply(in_block[i], out_block[i]); 184 gain_applier_.Apply(in_block[i], out_block[i]);
171 } 185 }
172 } 186 }
173 187
174 void IntelligibilityEnhancer::SnrBasedEffectActivation() { 188 void IntelligibilityEnhancer::SnrBasedEffectActivation() {
175 const float* clear_psd = clear_power_estimator_.power().data(); 189 const float* clear_psd = clear_power_estimator_.power().data();
176 const float* noise_psd = noise_power_estimator_.power().data(); 190 const float* noise_psd = noise_power_estimator_.power().data();
177 const float clear_power = 191 const float clear_power =
178 std::accumulate(clear_psd, clear_psd + freqs_, 0.f); 192 std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
179 const float noise_power = 193 const float noise_power =
180 std::accumulate(noise_psd, noise_psd + freqs_, 0.f); 194 std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
181 snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power / 195 snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /
182 (noise_power + std::numeric_limits<float>::epsilon()); 196 (noise_power + std::numeric_limits<float>::epsilon());
183 if (is_active_) { 197 if (is_active_) {
184 if (snr_ > kMaxActiveSNR) { 198 if (snr_ > kMaxActiveSNR) {
199 LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
ivoc 2016/07/01 08:04:30 This should probably say "deactivated".
aluebs-webrtc 2016/07/01 22:11:13 Good catch. Done.
200 << num_chunks_;
185 is_active_ = false; 201 is_active_ = false;
186 // Set the target gains to unity. 202 // Set the target gains to unity.
187 float* gains = gain_applier_.target(); 203 float* gains = gain_applier_.target();
188 for (size_t i = 0; i < freqs_; ++i) { 204 for (size_t i = 0; i < freqs_; ++i) {
189 gains[i] = 1.f; 205 gains[i] = 1.f;
190 } 206 }
191 } 207 }
192 } else { 208 } else {
193 is_active_ = snr_ < kMinInactiveSNR; 209 if (snr_ < kMinInactiveSNR) {
210 LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
211 << num_chunks_;
212 is_active_ = true;
213 }
194 } 214 }
195 } 215 }
196 216
197 void IntelligibilityEnhancer::SolveForLambda(float power_target) { 217 void IntelligibilityEnhancer::SolveForLambda(float power_target) {
198 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values 218 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
199 const int kMaxIters = 100; // for these, based on experiments. 219 const int kMaxIters = 100; // for these, based on experiments.
200 220
201 const float reciprocal_power_target = 221 const float reciprocal_power_target =
202 1.f / (power_target + std::numeric_limits<float>::epsilon()); 222 1.f / (power_target + std::numeric_limits<float>::epsilon());
203 float lambda_bot = kLambdaBot; 223 float lambda_bot = kLambdaBot;
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
344 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 364 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
345 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 365 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
346 chunks_since_voice_ = 0; 366 chunks_since_voice_ = 0;
347 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 367 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
348 ++chunks_since_voice_; 368 ++chunks_since_voice_;
349 } 369 }
350 return chunks_since_voice_ < kSpeechOffsetDelay; 370 return chunks_since_voice_ < kSpeechOffsetDelay;
351 } 371 }
352 372
353 } // namespace webrtc 373 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698