OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... |
22 | 22 |
23 namespace webrtc { | 23 namespace webrtc { |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 const size_t kErbResolution = 2; | 27 const size_t kErbResolution = 2; |
28 const int kWindowSizeMs = 16; | 28 const int kWindowSizeMs = 16; |
29 const int kChunkSizeMs = 10; // Size provided by APM. | 29 const int kChunkSizeMs = 10; // Size provided by APM. |
30 const float kClipFreqKhz = 0.2f; | 30 const float kClipFreqKhz = 0.2f; |
31 const float kKbdAlpha = 1.5f; | 31 const float kKbdAlpha = 1.5f; |
32 const float kLambdaBot = -1.0f; // Extreme values in bisection | 32 const float kLambdaBot = -1.f; // Extreme values in bisection |
33 const float kLambdaTop = -1e-5f; // search for lamda. | 33 const float kLambdaTop = -1e-5f; // search for lamda. |
34 const float kVoiceProbabilityThreshold = 0.02f; | 34 const float kVoiceProbabilityThreshold = 0.02f; |
35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. |
36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; |
37 const float kDecayRate = 0.98f; // Power estimation decay rate. | 37 const float kDecayRate = 0.98f; // Power estimation decay rate. |
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. | 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. |
39 const float kRho = 0.0004f; // Default production and interpretation SNR. | 39 const float kRho = 0.0004f; // Default production and interpretation SNR. |
| 40 const float kPowerNormalizationFactor = 1.f / (1 << 30); |
40 | 41 |
41 // Returns dot product of vectors |a| and |b| with size |length|. | 42 // Returns dot product of vectors |a| and |b| with size |length|. |
42 float DotProduct(const float* a, const float* b, size_t length) { | 43 float DotProduct(const float* a, const float* b, size_t length) { |
43 float ret = 0.f; | 44 float ret = 0.f; |
44 for (size_t i = 0; i < length; ++i) { | 45 for (size_t i = 0; i < length; ++i) { |
45 ret += a[i] * b[i]; | 46 ret += a[i] * b[i]; |
46 } | 47 } |
47 return ret; | 48 return ret; |
48 } | 49 } |
49 | 50 |
50 // Computes the power across ERB bands from the power spectral density |pow|. | 51 // Computes the power across ERB bands from the power spectral density |pow|. |
51 // Stores it in |result|. | 52 // Stores it in |result|. |
52 void MapToErbBands(const float* pow, | 53 void MapToErbBands(const float* pow, |
53 const std::vector<std::vector<float>>& filter_bank, | 54 const std::vector<std::vector<float>>& filter_bank, |
54 float* result) { | 55 float* result) { |
55 for (size_t i = 0; i < filter_bank.size(); ++i) { | 56 for (size_t i = 0; i < filter_bank.size(); ++i) { |
56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 57 RTC_DCHECK_GT(filter_bank[i].size(), 0u); |
57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 58 result[i] = kPowerNormalizationFactor * |
| 59 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); |
58 } | 60 } |
59 } | 61 } |
60 | 62 |
61 } // namespace | 63 } // namespace |
62 | 64 |
63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 65 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
64 size_t num_render_channels, | 66 size_t num_render_channels, |
65 size_t num_noise_bins) | 67 size_t num_noise_bins) |
66 : freqs_(RealFourier::ComplexLength( | 68 : freqs_(RealFourier::ComplexLength( |
67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 69 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 if (is_speech_) { | 135 if (is_speech_) { |
134 clear_power_estimator_.Step(in_block[0]); | 136 clear_power_estimator_.Step(in_block[0]); |
135 } | 137 } |
136 const std::vector<float>& clear_power = clear_power_estimator_.power(); | 138 const std::vector<float>& clear_power = clear_power_estimator_.power(); |
137 const std::vector<float>& noise_power = noise_power_estimator_.power(); | 139 const std::vector<float>& noise_power = noise_power_estimator_.power(); |
138 MapToErbBands(clear_power.data(), render_filter_bank_, | 140 MapToErbBands(clear_power.data(), render_filter_bank_, |
139 filtered_clear_pow_.data()); | 141 filtered_clear_pow_.data()); |
140 MapToErbBands(noise_power.data(), capture_filter_bank_, | 142 MapToErbBands(noise_power.data(), capture_filter_bank_, |
141 filtered_noise_pow_.data()); | 143 filtered_noise_pow_.data()); |
142 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 144 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); |
143 const float power_target = | 145 const float power_target = std::accumulate( |
144 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); | 146 filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f); |
145 const float power_top = | 147 const float power_top = |
146 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 148 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
147 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 149 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); |
148 const float power_bot = | 150 const float power_bot = |
149 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 151 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
150 if (power_target >= power_bot && power_target <= power_top) { | 152 if (power_target >= power_bot && power_target <= power_top) { |
151 SolveForLambda(power_target); | 153 SolveForLambda(power_target); |
152 UpdateErbGains(); | 154 UpdateErbGains(); |
153 } // Else experiencing power underflow, so do nothing. | 155 } // Else experiencing power underflow, so do nothing. |
154 for (size_t i = 0; i < in_channels; ++i) { | 156 for (size_t i = 0; i < in_channels; ++i) { |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 308 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 309 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
308 chunks_since_voice_ = 0; | 310 chunks_since_voice_ = 0; |
309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 311 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
310 ++chunks_since_voice_; | 312 ++chunks_since_voice_; |
311 } | 313 } |
312 return chunks_since_voice_ < kSpeechOffsetDelay; | 314 return chunks_since_voice_ < kSpeechOffsetDelay; |
313 } | 315 } |
314 | 316 |
315 } // namespace webrtc | 317 } // namespace webrtc |
OLD | NEW |