OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 12 matching lines...) Expand all Loading... | |
23 namespace webrtc { | 23 namespace webrtc { |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 const size_t kErbResolution = 2; | 27 const size_t kErbResolution = 2; |
28 const int kWindowSizeMs = 16; | 28 const int kWindowSizeMs = 16; |
29 const int kChunkSizeMs = 10; // Size provided by APM. | 29 const int kChunkSizeMs = 10; // Size provided by APM. |
30 const float kClipFreqKhz = 0.2f; | 30 const float kClipFreqKhz = 0.2f; |
31 const float kKbdAlpha = 1.5f; | 31 const float kKbdAlpha = 1.5f; |
32 const float kLambdaBot = -1.0f; // Extreme values in bisection | 32 const float kLambdaBot = -1.0f; // Extreme values in bisection |
33 const float kLambdaTop = -10e-18f; // search for lamda. | 33 const float kLambdaTop = -1e-5; // search for lamda. |
34 const float kVoiceProbabilityThreshold = 0.02; | 34 const float kVoiceProbabilityThreshold = 0.02; |
35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. |
36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; |
37 const float kDecayRate = 0.98f; // Power estimation decay rate. | 37 const float kDecayRate = 0.98f; // Power estimation decay rate. |
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. | 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. |
39 const float kRho = 0.0004f; // Default production and interpretation SNR. | 39 const float kRho = 0.0004f; // Default production and interpretation SNR. |
40 | 40 |
41 // Returns dot product of vectors |a| and |b| with size |length|. | 41 // Returns dot product of vectors |a| and |b| with size |length|. |
42 float DotProduct(const float* a, const float* b, size_t length) { | 42 float DotProduct(const float* a, const float* b, size_t length) { |
43 float ret = 0.f; | 43 float ret = 0.f; |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
157 filtered_noise_pow_.get()); | 157 filtered_noise_pow_.get()); |
158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); | 158 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); |
159 const float power_target = | 159 const float power_target = |
160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f); | 160 std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f); |
161 const float power_top = | 161 const float power_top = |
162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); | 162 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); |
163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); | 163 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); |
164 const float power_bot = | 164 const float power_bot = |
165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); | 165 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); |
166 if (power_target >= power_bot && power_target <= power_top) { | 166 if (power_target >= power_bot && power_target <= power_top) { |
167 SolveForLambda(power_target, power_bot, power_top); | 167 SolveForLambda(power_target); |
168 UpdateErbGains(); | 168 UpdateErbGains(); |
169 } // Else experiencing power underflow, so do nothing. | 169 } // Else experiencing power underflow, so do nothing. |
170 gain_applier_.Apply(in_block, out_block); | 170 gain_applier_.Apply(in_block, out_block); |
171 } | 171 } |
172 | 172 |
173 void IntelligibilityEnhancer::SolveForLambda(float power_target, | 173 void IntelligibilityEnhancer::SolveForLambda(float power_target) { |
174 float power_bot, | |
175 float power_top) { | |
176 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values | 174 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values |
177 const int kMaxIters = 100; // for these, based on experiments. | 175 const int kMaxIters = 100; // for these, based on experiments. |
178 | 176 |
179 const float reciprocal_power_target = | 177 const float reciprocal_power_target = |
180 1.f / (power_target + std::numeric_limits<float>::epsilon()); | 178 1.f / (power_target + std::numeric_limits<float>::epsilon()); |
181 float lambda_bot = kLambdaBot; | 179 float lambda_bot = kLambdaBot; |
182 float lambda_top = kLambdaTop; | 180 float lambda_top = kLambdaTop; |
183 float power_ratio = 2.f; // Ratio of achieved power to target power. | 181 float power_ratio = 2.f; // Ratio of achieved power to target power. |
184 int iters = 0; | 182 int iters = 0; |
185 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { | 183 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { |
186 const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.f; | 184 const float lambda = (lambda_bot + lambda_top) / 2.f; |
187 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); | 185 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); |
188 const float power = | 186 const float power = |
189 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); | 187 DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_); |
190 if (power < power_target) { | 188 if (power < power_target) { |
191 lambda_bot = lambda; | 189 lambda_bot = lambda; |
192 } else { | 190 } else { |
193 lambda_top = lambda; | 191 lambda_top = lambda; |
194 } | 192 } |
195 power_ratio = std::fabs(power * reciprocal_power_target); | 193 power_ratio = std::fabs(power * reciprocal_power_target); |
196 ++iters; | 194 ++iters; |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
281 for (size_t j = 0; j < bank_size_; ++j) { | 279 for (size_t j = 0; j < bank_size_; ++j) { |
282 filter_bank[j][i] /= sum; | 280 filter_bank[j][i] /= sum; |
283 } | 281 } |
284 } | 282 } |
285 return filter_bank; | 283 return filter_bank; |
286 } | 284 } |
287 | 285 |
288 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, | 286 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
289 size_t start_freq, | 287 size_t start_freq, |
290 float* sols) { | 288 float* sols) { |
291 bool quadratic = (kRho < 1.f); | 289 const float kMinPower = 1e-5; |
290 | |
292 const float* pow_x0 = filtered_clear_pow_.get(); | 291 const float* pow_x0 = filtered_clear_pow_.get(); |
293 const float* pow_n0 = filtered_noise_pow_.get(); | 292 const float* pow_n0 = filtered_noise_pow_.get(); |
294 | 293 |
295 for (size_t n = 0; n < start_freq; ++n) { | 294 for (size_t n = 0; n < start_freq; ++n) { |
296 sols[n] = 1.f; | 295 sols[n] = 1.f; |
297 } | 296 } |
298 | 297 |
299 // Analytic solution for optimal gains. See paper for derivation. | 298 // Analytic solution for optimal gains. See paper for derivation. |
300 for (size_t n = start_freq - 1; n < bank_size_; ++n) { | 299 for (size_t n = start_freq; n < bank_size_; ++n) { |
301 float alpha0, beta0, gamma0; | 300 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { |
302 gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + | 301 sols[n] = 1.f; |
303 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; | |
304 beta0 = lambda * pow_x0[n] * (2 - kRho) * pow_x0[n] * pow_n0[n]; | |
305 if (quadratic) { | |
306 alpha0 = lambda * pow_x0[n] * (1 - kRho) * pow_x0[n] * pow_x0[n]; | |
307 sols[n] = | |
308 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / | |
309 (2 * alpha0 + std::numeric_limits<float>::epsilon()); | |
310 } else { | 302 } else { |
311 sols[n] = -gamma0 / beta0; | 303 float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + |
hlundin-webrtc
2016/02/22 12:59:22
I like local consts...
aluebs-webrtc
2016/02/22 23:56:10
I like them as well, but apparently I have a hard
| |
304 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; | |
305 float beta0 = lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; | |
306 float alpha0 = lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; | |
307 if (beta0 * beta0 < 4.f * alpha0 * gamma0) { | |
hlundin-webrtc
2016/02/22 12:59:22
You are essentially calculating beta0 * beta0 - 4.
aluebs-webrtc
2016/02/22 23:56:11
Good point, done. Although I am not creative enoug
hlundin-webrtc
2016/02/24 09:51:00
I don't know the algorithm good enough to suggest
turaj
2016/02/24 15:26:42
If you consider my suggestion of using max(0, b^2
aluebs-webrtc
2016/02/24 23:40:48
No name is best name :)
| |
308 sols[n] = -beta0 / (2.f * alpha0); | |
hlundin-webrtc
2016/02/22 12:59:22
This is not the same as the old code, right?
turaj
2016/02/22 16:05:20
My interpretation of the paper Eq 18 is that the q
aluebs-webrtc
2016/02/22 23:56:10
I agree that the quadratic equation always has rea
turaj
2016/02/24 15:26:42
Thanks for the explanation, it makes total sense,
aluebs-webrtc
2016/02/24 23:40:47
That is a great point, done.
| |
309 } else { | |
310 sols[n] = (-beta0 - sqrtf(beta0 * beta0 - 4.f * alpha0 * gamma0)) / | |
hlundin-webrtc
2016/02/22 12:59:22
No need for regularization any longer?
aluebs-webrtc
2016/02/22 23:56:10
No, because now I check for a minimum power in lin
hlundin-webrtc
2016/02/24 09:51:00
You may want to add a DCHECK to document/verify yo
aluebs-webrtc
2016/02/24 23:40:47
Done.
| |
311 (2.f * alpha0); | |
312 } | |
313 sols[n] = fmax(0.f, sols[n]); | |
312 } | 314 } |
313 sols[n] = fmax(0, sols[n]); | |
314 } | 315 } |
315 } | 316 } |
316 | 317 |
317 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { | 318 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { |
318 FloatToS16(audio, chunk_length_, &audio_s16_[0]); | 319 FloatToS16(audio, chunk_length_, &audio_s16_[0]); |
319 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_); | 320 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_); |
320 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 321 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
321 chunks_since_voice_ = 0; | 322 chunks_since_voice_ = 0; |
322 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 323 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
323 ++chunks_since_voice_; | 324 ++chunks_since_voice_; |
324 } | 325 } |
325 return chunks_since_voice_ < kSpeechOffsetDelay; | 326 return chunks_since_voice_ < kSpeechOffsetDelay; |
326 } | 327 } |
327 | 328 |
328 } // namespace webrtc | 329 } // namespace webrtc |
OLD | NEW |