Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(200)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2035213002: Only update Intelligibility Enhancer gains every 10 chunks (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 22 matching lines...) Expand all
33 const float kLambdaTop = -1e-5f; // search for lamda. 33 const float kLambdaTop = -1e-5f; // search for lamda.
34 const float kVoiceProbabilityThreshold = 0.02f; 34 const float kVoiceProbabilityThreshold = 0.02f;
35 // Number of chunks after voice activity which is still considered speech. 35 // Number of chunks after voice activity which is still considered speech.
36 const size_t kSpeechOffsetDelay = 80; 36 const size_t kSpeechOffsetDelay = 80;
37 const float kDecayRate = 0.994f; // Power estimation decay rate. 37 const float kDecayRate = 0.994f; // Power estimation decay rate.
38 const float kMaxRelativeGainChange = 0.006f; 38 const float kMaxRelativeGainChange = 0.006f;
39 const float kRho = 0.0004f; // Default production and interpretation SNR. 39 const float kRho = 0.0004f; // Default production and interpretation SNR.
40 const float kPowerNormalizationFactor = 1.f / (1 << 30); 40 const float kPowerNormalizationFactor = 1.f / (1 << 30);
41 const float kMaxActiveSNR = 128.f; // 21dB 41 const float kMaxActiveSNR = 128.f; // 21dB
42 const float kMinInactiveSNR = 32.f; // 15dB 42 const float kMinInactiveSNR = 32.f; // 15dB
43 const size_t kGainUpdatePeriod = 10u;
43 44
44 // Returns dot product of vectors |a| and |b| with size |length|. 45 // Returns dot product of vectors |a| and |b| with size |length|.
45 float DotProduct(const float* a, const float* b, size_t length) { 46 float DotProduct(const float* a, const float* b, size_t length) {
46 float ret = 0.f; 47 float ret = 0.f;
47 for (size_t i = 0; i < length; ++i) { 48 for (size_t i = 0; i < length; ++i) {
48 ret += a[i] * b[i]; 49 ret += a[i] * b[i];
49 } 50 }
50 return ret; 51 return ret;
51 } 52 }
52 53
(...skipping 28 matching lines...) Expand all
81 center_freqs_(bank_size_), 82 center_freqs_(bank_size_),
82 capture_filter_bank_(CreateErbBank(num_noise_bins)), 83 capture_filter_bank_(CreateErbBank(num_noise_bins)),
83 render_filter_bank_(CreateErbBank(freqs_)), 84 render_filter_bank_(CreateErbBank(freqs_)),
84 gains_eq_(bank_size_), 85 gains_eq_(bank_size_),
85 gain_applier_(freqs_, kMaxRelativeGainChange), 86 gain_applier_(freqs_, kMaxRelativeGainChange),
86 audio_s16_(chunk_length_), 87 audio_s16_(chunk_length_),
87 chunks_since_voice_(kSpeechOffsetDelay), 88 chunks_since_voice_(kSpeechOffsetDelay),
88 is_speech_(false), 89 is_speech_(false),
89 snr_(kMaxActiveSNR), 90 snr_(kMaxActiveSNR),
90 is_active_(false), 91 is_active_(false),
92 num_chunks_(0u),
91 noise_estimation_buffer_(num_noise_bins), 93 noise_estimation_buffer_(num_noise_bins),
92 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, 94 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
93 std::vector<float>(num_noise_bins), 95 std::vector<float>(num_noise_bins),
94 RenderQueueItemVerifier<float>(num_noise_bins)) { 96 RenderQueueItemVerifier<float>(num_noise_bins)) {
95 RTC_DCHECK_LE(kRho, 1.f); 97 RTC_DCHECK_LE(kRho, 1.f);
96 98
97 const size_t erb_index = static_cast<size_t>( 99 const size_t erb_index = static_cast<size_t>(
98 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + 100 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
99 43.f)); 101 43.f));
100 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); 102 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
137 const std::complex<float>* const* in_block, 139 const std::complex<float>* const* in_block,
138 size_t in_channels, 140 size_t in_channels,
139 size_t frames, 141 size_t frames,
140 size_t /* out_channels */, 142 size_t /* out_channels */,
141 std::complex<float>* const* out_block) { 143 std::complex<float>* const* out_block) {
142 RTC_DCHECK_EQ(freqs_, frames); 144 RTC_DCHECK_EQ(freqs_, frames);
143 if (is_speech_) { 145 if (is_speech_) {
144 clear_power_estimator_.Step(in_block[0]); 146 clear_power_estimator_.Step(in_block[0]);
145 } 147 }
146 SnrBasedEffectActivation(); 148 SnrBasedEffectActivation();
147 if (is_active_) { 149 if (is_active_ && num_chunks_++ % kGainUpdatePeriod == 0) {
148 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, 150 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
149 filtered_clear_pow_.data()); 151 filtered_clear_pow_.data());
150 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, 152 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
151 filtered_noise_pow_.data()); 153 filtered_noise_pow_.data());
152 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); 154 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
153 const float power_target = std::accumulate( 155 const float power_target = std::accumulate(
154 filtered_clear_pow_.data(), 156 filtered_clear_pow_.data(),
155 filtered_clear_pow_.data() + bank_size_, 157 filtered_clear_pow_.data() + bank_size_,
156 0.f); 158 0.f);
157 const float power_top = 159 const float power_top =
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 344 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
343 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 345 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
344 chunks_since_voice_ = 0; 346 chunks_since_voice_ = 0;
345 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 347 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
346 ++chunks_since_voice_; 348 ++chunks_since_voice_;
347 } 349 }
348 return chunks_since_voice_ < kSpeechOffsetDelay; 350 return chunks_since_voice_ < kSpeechOffsetDelay;
349 } 351 }
350 352
351 } // namespace webrtc 353 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698