OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... | |
27 const size_t kErbResolution = 2; | 27 const size_t kErbResolution = 2; |
28 const int kWindowSizeMs = 16; | 28 const int kWindowSizeMs = 16; |
29 const int kChunkSizeMs = 10; // Size provided by APM. | 29 const int kChunkSizeMs = 10; // Size provided by APM. |
30 const float kClipFreqKhz = 0.2f; | 30 const float kClipFreqKhz = 0.2f; |
31 const float kKbdAlpha = 1.5f; | 31 const float kKbdAlpha = 1.5f; |
32 const float kLambdaBot = -1.0f; // Extreme values in bisection | 32 const float kLambdaBot = -1.0f; // Extreme values in bisection |
33 const float kLambdaTop = -1e-5f; // search for lamda. | 33 const float kLambdaTop = -1e-5f; // search for lamda. |
34 const float kVoiceProbabilityThreshold = 0.02f; | 34 const float kVoiceProbabilityThreshold = 0.02f; |
35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. |
36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; |
37 const float kDecayRate = 0.98f; // Power estimation decay rate. | 37 const float kDecayRate = 0.994f; // Power estimation decay rate. |
peah-webrtc
2016/04/01 04:43:50
I cannot really say that I yet understand the unde
hlundin-webrtc
2016/04/01 08:36:38
Acknowledged.
aluebs-webrtc
2016/04/01 17:33:35
Well, the decay rate of the PSD estimations doesn'
| |
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. | |
39 const float kRho = 0.0004f; // Default production and interpretation SNR. | 38 const float kRho = 0.0004f; // Default production and interpretation SNR. |
40 | 39 |
41 // Returns dot product of vectors |a| and |b| with size |length|. | 40 // Returns dot product of vectors |a| and |b| with size |length|. |
42 float DotProduct(const float* a, const float* b, size_t length) { | 41 float DotProduct(const float* a, const float* b, size_t length) { |
43 float ret = 0.f; | 42 float ret = 0.f; |
44 for (size_t i = 0; i < length; ++i) { | 43 for (size_t i = 0; i < length; ++i) { |
45 ret += a[i] * b[i]; | 44 ret += a[i] * b[i]; |
46 } | 45 } |
47 return ret; | 46 return ret; |
48 } | 47 } |
(...skipping 22 matching lines...) Expand all Loading... | |
71 sample_rate_hz_(sample_rate_hz), | 70 sample_rate_hz_(sample_rate_hz), |
72 num_render_channels_(num_render_channels), | 71 num_render_channels_(num_render_channels), |
73 clear_power_estimator_(freqs_, kDecayRate), | 72 clear_power_estimator_(freqs_, kDecayRate), |
74 noise_power_estimator_(num_noise_bins, kDecayRate), | 73 noise_power_estimator_(num_noise_bins, kDecayRate), |
75 filtered_clear_pow_(bank_size_, 0.f), | 74 filtered_clear_pow_(bank_size_, 0.f), |
76 filtered_noise_pow_(num_noise_bins, 0.f), | 75 filtered_noise_pow_(num_noise_bins, 0.f), |
77 center_freqs_(bank_size_), | 76 center_freqs_(bank_size_), |
78 capture_filter_bank_(CreateErbBank(num_noise_bins)), | 77 capture_filter_bank_(CreateErbBank(num_noise_bins)), |
79 render_filter_bank_(CreateErbBank(freqs_)), | 78 render_filter_bank_(CreateErbBank(freqs_)), |
80 gains_eq_(bank_size_), | 79 gains_eq_(bank_size_), |
81 gain_applier_(freqs_, kMaxRelativeGainChange), | 80 gain_applier_(freqs_, 1.f - kDecayRate), |
82 audio_s16_(chunk_length_), | 81 audio_s16_(chunk_length_), |
83 chunks_since_voice_(kSpeechOffsetDelay), | 82 chunks_since_voice_(kSpeechOffsetDelay), |
84 is_speech_(false), | 83 is_speech_(false), |
85 noise_estimation_buffer_(num_noise_bins), | 84 noise_estimation_buffer_(num_noise_bins), |
86 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, | 85 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, |
87 std::vector<float>(num_noise_bins), | 86 std::vector<float>(num_noise_bins), |
88 RenderQueueItemVerifier<float>(num_noise_bins)) { | 87 RenderQueueItemVerifier<float>(num_noise_bins)) { |
89 RTC_DCHECK_LE(kRho, 1.f); | 88 RTC_DCHECK_LE(kRho, 1.f); |
90 | 89 |
91 const size_t erb_index = static_cast<size_t>( | 90 const size_t erb_index = static_cast<size_t>( |
(...skipping 214 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 305 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 306 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
308 chunks_since_voice_ = 0; | 307 chunks_since_voice_ = 0; |
309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 308 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
310 ++chunks_since_voice_; | 309 ++chunks_since_voice_; |
311 } | 310 } |
312 return chunks_since_voice_ < kSpeechOffsetDelay; | 311 return chunks_since_voice_ < kSpeechOffsetDelay; |
313 } | 312 } |
314 | 313 |
315 } // namespace webrtc | 314 } // namespace webrtc |
OLD | NEW |