OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... |
33 const float kLambdaTop = -1e-5f; // search for lamda. | 33 const float kLambdaTop = -1e-5f; // search for lamda. |
34 const float kVoiceProbabilityThreshold = 0.02f; | 34 const float kVoiceProbabilityThreshold = 0.02f; |
35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. |
36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; |
37 const float kDecayRate = 0.994f; // Power estimation decay rate. | 37 const float kDecayRate = 0.994f; // Power estimation decay rate. |
38 const float kMaxRelativeGainChange = 0.006f; | 38 const float kMaxRelativeGainChange = 0.006f; |
39 const float kRho = 0.0004f; // Default production and interpretation SNR. | 39 const float kRho = 0.0004f; // Default production and interpretation SNR. |
40 const float kPowerNormalizationFactor = 1.f / (1 << 30); | 40 const float kPowerNormalizationFactor = 1.f / (1 << 30); |
41 const float kMaxActiveSNR = 128.f; // 21dB | 41 const float kMaxActiveSNR = 128.f; // 21dB |
42 const float kMinInactiveSNR = 32.f; // 15dB | 42 const float kMinInactiveSNR = 32.f; // 15dB |
| 43 const size_t kGainUpdatePeriod = 10u; |
43 | 44 |
44 // Returns dot product of vectors |a| and |b| with size |length|. | 45 // Returns dot product of vectors |a| and |b| with size |length|. |
45 float DotProduct(const float* a, const float* b, size_t length) { | 46 float DotProduct(const float* a, const float* b, size_t length) { |
46 float ret = 0.f; | 47 float ret = 0.f; |
47 for (size_t i = 0; i < length; ++i) { | 48 for (size_t i = 0; i < length; ++i) { |
48 ret += a[i] * b[i]; | 49 ret += a[i] * b[i]; |
49 } | 50 } |
50 return ret; | 51 return ret; |
51 } | 52 } |
52 | 53 |
(...skipping 28 matching lines...) Expand all Loading... |
81 center_freqs_(bank_size_), | 82 center_freqs_(bank_size_), |
82 capture_filter_bank_(CreateErbBank(num_noise_bins)), | 83 capture_filter_bank_(CreateErbBank(num_noise_bins)), |
83 render_filter_bank_(CreateErbBank(freqs_)), | 84 render_filter_bank_(CreateErbBank(freqs_)), |
84 gains_eq_(bank_size_), | 85 gains_eq_(bank_size_), |
85 gain_applier_(freqs_, kMaxRelativeGainChange), | 86 gain_applier_(freqs_, kMaxRelativeGainChange), |
86 audio_s16_(chunk_length_), | 87 audio_s16_(chunk_length_), |
87 chunks_since_voice_(kSpeechOffsetDelay), | 88 chunks_since_voice_(kSpeechOffsetDelay), |
88 is_speech_(false), | 89 is_speech_(false), |
89 snr_(kMaxActiveSNR), | 90 snr_(kMaxActiveSNR), |
90 is_active_(false), | 91 is_active_(false), |
| 92 num_chunks_(0u), |
91 noise_estimation_buffer_(num_noise_bins), | 93 noise_estimation_buffer_(num_noise_bins), |
92 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, | 94 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, |
93 std::vector<float>(num_noise_bins), | 95 std::vector<float>(num_noise_bins), |
94 RenderQueueItemVerifier<float>(num_noise_bins)) { | 96 RenderQueueItemVerifier<float>(num_noise_bins)) { |
95 RTC_DCHECK_LE(kRho, 1.f); | 97 RTC_DCHECK_LE(kRho, 1.f); |
96 | 98 |
97 const size_t erb_index = static_cast<size_t>( | 99 const size_t erb_index = static_cast<size_t>( |
98 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + | 100 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + |
99 43.f)); | 101 43.f)); |
100 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); | 102 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
137 const std::complex<float>* const* in_block, | 139 const std::complex<float>* const* in_block, |
138 size_t in_channels, | 140 size_t in_channels, |
139 size_t frames, | 141 size_t frames, |
140 size_t /* out_channels */, | 142 size_t /* out_channels */, |
141 std::complex<float>* const* out_block) { | 143 std::complex<float>* const* out_block) { |
142 RTC_DCHECK_EQ(freqs_, frames); | 144 RTC_DCHECK_EQ(freqs_, frames); |
143 if (is_speech_) { | 145 if (is_speech_) { |
144 clear_power_estimator_.Step(in_block[0]); | 146 clear_power_estimator_.Step(in_block[0]); |
145 } | 147 } |
146 SnrBasedEffectActivation(); | 148 SnrBasedEffectActivation(); |
147 if (is_active_) { | 149 if (is_active_ && num_chunks_++ % kGainUpdatePeriod == 0) { |
148 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, | 150 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, |
149 filtered_clear_pow_.data()); | 151 filtered_clear_pow_.data()); |
150 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, | 152 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, |
151 filtered_noise_pow_.data()); | 153 filtered_noise_pow_.data()); |
152 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 154 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); |
153 const float power_target = std::accumulate( | 155 const float power_target = std::accumulate( |
154 filtered_clear_pow_.data(), | 156 filtered_clear_pow_.data(), |
155 filtered_clear_pow_.data() + bank_size_, | 157 filtered_clear_pow_.data() + bank_size_, |
156 0.f); | 158 0.f); |
157 const float power_top = | 159 const float power_top = |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
342 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 344 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
343 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 345 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
344 chunks_since_voice_ = 0; | 346 chunks_since_voice_ = 0; |
345 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 347 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
346 ++chunks_since_voice_; | 348 ++chunks_since_voice_; |
347 } | 349 } |
348 return chunks_since_voice_ < kSpeechOffsetDelay; | 350 return chunks_since_voice_ < kSpeechOffsetDelay; |
349 } | 351 } |
350 | 352 |
351 } // namespace webrtc | 353 } // namespace webrtc |
OLD | NEW |