Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1821443003: Fix normalization of noise estimate in NoiseSuppressor (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Disable noise suppressor bit-exactness unittests Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 22
23 namespace webrtc { 23 namespace webrtc {
24 24
25 namespace { 25 namespace {
26 26
27 const size_t kErbResolution = 2; 27 const size_t kErbResolution = 2;
28 const int kWindowSizeMs = 16; 28 const int kWindowSizeMs = 16;
29 const int kChunkSizeMs = 10; // Size provided by APM. 29 const int kChunkSizeMs = 10; // Size provided by APM.
30 const float kClipFreqKhz = 0.2f; 30 const float kClipFreqKhz = 0.2f;
31 const float kKbdAlpha = 1.5f; 31 const float kKbdAlpha = 1.5f;
32 const float kLambdaBot = -1.0f; // Extreme values in bisection 32 const float kLambdaBot = -1.f; // Extreme values in bisection
33 const float kLambdaTop = -1e-5f; // search for lamda. 33 const float kLambdaTop = -1e-5f; // search for lamda.
34 const float kVoiceProbabilityThreshold = 0.02f; 34 const float kVoiceProbabilityThreshold = 0.02f;
35 // Number of chunks after voice activity which is still considered speech. 35 // Number of chunks after voice activity which is still considered speech.
36 const size_t kSpeechOffsetDelay = 80; 36 const size_t kSpeechOffsetDelay = 80;
37 const float kDecayRate = 0.98f; // Power estimation decay rate. 37 const float kDecayRate = 0.98f; // Power estimation decay rate.
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.
39 const float kRho = 0.0004f; // Default production and interpretation SNR. 39 const float kRho = 0.0004f; // Default production and interpretation SNR.
40 const float kPowerNormalizationFactor = 1.f / (1 << 30);
40 41
41 // Returns dot product of vectors |a| and |b| with size |length|. 42 // Returns dot product of vectors |a| and |b| with size |length|.
42 float DotProduct(const float* a, const float* b, size_t length) { 43 float DotProduct(const float* a, const float* b, size_t length) {
43 float ret = 0.f; 44 float ret = 0.f;
44 for (size_t i = 0; i < length; ++i) { 45 for (size_t i = 0; i < length; ++i) {
45 ret += a[i] * b[i]; 46 ret += a[i] * b[i];
46 } 47 }
47 return ret; 48 return ret;
48 } 49 }
49 50
50 // Computes the power across ERB bands from the power spectral density |pow|. 51 // Computes the power across ERB bands from the power spectral density |pow|.
51 // Stores it in |result|. 52 // Stores it in |result|.
52 void MapToErbBands(const float* pow, 53 void MapToErbBands(const float* pow,
53 const std::vector<std::vector<float>>& filter_bank, 54 const std::vector<std::vector<float>>& filter_bank,
54 float* result) { 55 float* result) {
55 for (size_t i = 0; i < filter_bank.size(); ++i) { 56 for (size_t i = 0; i < filter_bank.size(); ++i) {
56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); 57 RTC_DCHECK_GT(filter_bank[i].size(), 0u);
57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); 58 result[i] = kPowerNormalizationFactor *
59 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
58 } 60 }
59 } 61 }
60 62
61 } // namespace 63 } // namespace
62 64
63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, 65 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
64 size_t num_render_channels, 66 size_t num_render_channels,
65 size_t num_noise_bins) 67 size_t num_noise_bins)
66 : freqs_(RealFourier::ComplexLength( 68 : freqs_(RealFourier::ComplexLength(
67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), 69 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 if (is_speech_) { 135 if (is_speech_) {
134 clear_power_estimator_.Step(in_block[0]); 136 clear_power_estimator_.Step(in_block[0]);
135 } 137 }
136 const std::vector<float>& clear_power = clear_power_estimator_.power(); 138 const std::vector<float>& clear_power = clear_power_estimator_.power();
137 const std::vector<float>& noise_power = noise_power_estimator_.power(); 139 const std::vector<float>& noise_power = noise_power_estimator_.power();
138 MapToErbBands(clear_power.data(), render_filter_bank_, 140 MapToErbBands(clear_power.data(), render_filter_bank_,
139 filtered_clear_pow_.data()); 141 filtered_clear_pow_.data());
140 MapToErbBands(noise_power.data(), capture_filter_bank_, 142 MapToErbBands(noise_power.data(), capture_filter_bank_,
141 filtered_noise_pow_.data()); 143 filtered_noise_pow_.data());
142 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); 144 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
143 const float power_target = 145 const float power_target = std::accumulate(
144 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); 146 filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f);
145 const float power_top = 147 const float power_top =
146 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 148 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
147 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); 149 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
148 const float power_bot = 150 const float power_bot =
149 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 151 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
150 if (power_target >= power_bot && power_target <= power_top) { 152 if (power_target >= power_bot && power_target <= power_top) {
151 SolveForLambda(power_target); 153 SolveForLambda(power_target);
152 UpdateErbGains(); 154 UpdateErbGains();
153 } // Else experiencing power underflow, so do nothing. 155 } // Else experiencing power underflow, so do nothing.
154 for (size_t i = 0; i < in_channels; ++i) { 156 for (size_t i = 0; i < in_channels; ++i) {
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 308 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 309 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
308 chunks_since_voice_ = 0; 310 chunks_since_voice_ = 0;
309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 311 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
310 ++chunks_since_voice_; 312 ++chunks_since_voice_;
311 } 313 }
312 return chunks_since_voice_ < kSpeechOffsetDelay; 314 return chunks_since_voice_ < kSpeechOffsetDelay;
313 } 315 }
314 316
315 } // namespace webrtc 317 } // namespace webrtc
OLDNEW
« no previous file with comments | « no previous file | webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698