OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... | |
22 | 22 |
23 namespace webrtc { | 23 namespace webrtc { |
24 | 24 |
25 namespace { | 25 namespace { |
26 | 26 |
27 const size_t kErbResolution = 2; | 27 const size_t kErbResolution = 2; |
28 const int kWindowSizeMs = 16; | 28 const int kWindowSizeMs = 16; |
29 const int kChunkSizeMs = 10; // Size provided by APM. | 29 const int kChunkSizeMs = 10; // Size provided by APM. |
30 const float kClipFreqKhz = 0.2f; | 30 const float kClipFreqKhz = 0.2f; |
31 const float kKbdAlpha = 1.5f; | 31 const float kKbdAlpha = 1.5f; |
32 const float kLambdaBot = -1.0f; // Extreme values in bisection | 32 const double kLambdaBot = -1.0 / (1 << 30); // Extreme values in bisection |
turaj
2016/03/30 14:51:55
This is relatively large change which scales the
aluebs-webrtc
2016/03/31 00:26:32
Yes, it needed to be normalized by the new noise s
| |
33 const float kLambdaTop = -1e-5f; // search for lamda. | 33 const double kLambdaTop = -1e-5 / (1 << 30); // search for lamda. |
34 const float kVoiceProbabilityThreshold = 0.02f; | 34 const float kVoiceProbabilityThreshold = 0.02f; |
35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. |
36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; |
37 const float kDecayRate = 0.98f; // Power estimation decay rate. | 37 const float kDecayRate = 0.98f; // Power estimation decay rate. |
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. | 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. |
39 const float kRho = 0.0004f; // Default production and interpretation SNR. | 39 const float kRho = 0.0004f; // Default production and interpretation SNR. |
40 | 40 |
41 // Returns dot product of vectors |a| and |b| with size |length|. | 41 // Returns dot product of vectors |a| and |b| with size |length|. |
42 float DotProduct(const float* a, const float* b, size_t length) { | 42 float DotProduct(const float* a, const float* b, size_t length) { |
43 float ret = 0.f; | 43 float ret = 0.f; |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
155 gain_applier_.Apply(in_block[i], out_block[i]); | 155 gain_applier_.Apply(in_block[i], out_block[i]); |
156 } | 156 } |
157 } | 157 } |
158 | 158 |
159 void IntelligibilityEnhancer::SolveForLambda(float power_target) { | 159 void IntelligibilityEnhancer::SolveForLambda(float power_target) { |
160 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values | 160 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values |
161 const int kMaxIters = 100; // for these, based on experiments. | 161 const int kMaxIters = 100; // for these, based on experiments. |
162 | 162 |
163 const float reciprocal_power_target = | 163 const float reciprocal_power_target = |
164 1.f / (power_target + std::numeric_limits<float>::epsilon()); | 164 1.f / (power_target + std::numeric_limits<float>::epsilon()); |
165 float lambda_bot = kLambdaBot; | 165 double lambda_bot = kLambdaBot; |
166 float lambda_top = kLambdaTop; | 166 double lambda_top = kLambdaTop; |
167 float power_ratio = 2.f; // Ratio of achieved power to target power. | 167 float power_ratio = 2.f; // Ratio of achieved power to target power. |
168 int iters = 0; | 168 int iters = 0; |
169 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { | 169 while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { |
170 const float lambda = (lambda_bot + lambda_top) / 2.f; | 170 const double lambda = (lambda_bot + lambda_top) / 2.0; |
peah-webrtc
2016/03/30 13:44:45
lambda is always inbetween lambda_bot and lambda_t
aluebs-webrtc
2016/03/31 00:26:32
Good point, although I was trying to keep consiste
| |
171 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); | 171 SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); |
172 const float power = | 172 const float power = |
173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
174 if (power < power_target) { | 174 if (power < power_target) { |
175 lambda_bot = lambda; | 175 lambda_bot = lambda; |
176 } else { | 176 } else { |
177 lambda_top = lambda; | 177 lambda_top = lambda; |
178 } | 178 } |
179 power_ratio = std::fabs(power * reciprocal_power_target); | 179 power_ratio = std::fabs(power * reciprocal_power_target); |
180 ++iters; | 180 ++iters; |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
260 for (size_t j = 0; j < bank_size_; ++j) { | 260 for (size_t j = 0; j < bank_size_; ++j) { |
261 sum += filter_bank[j][i]; | 261 sum += filter_bank[j][i]; |
262 } | 262 } |
263 for (size_t j = 0; j < bank_size_; ++j) { | 263 for (size_t j = 0; j < bank_size_; ++j) { |
264 filter_bank[j][i] /= sum; | 264 filter_bank[j][i] /= sum; |
265 } | 265 } |
266 } | 266 } |
267 return filter_bank; | 267 return filter_bank; |
268 } | 268 } |
269 | 269 |
270 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, | 270 void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda, |
271 size_t start_freq, | 271 size_t start_freq, |
272 float* sols) { | 272 float* sols) { |
273 const float kMinPower = 1e-5f; | 273 const float kMinPower = 1e-5f; |
274 | 274 |
275 const float* pow_x0 = filtered_clear_pow_.data(); | 275 const float* pow_x0 = filtered_clear_pow_.data(); |
276 const float* pow_n0 = filtered_noise_pow_.data(); | 276 const float* pow_n0 = filtered_noise_pow_.data(); |
277 | 277 |
278 for (size_t n = 0; n < start_freq; ++n) { | 278 for (size_t n = 0; n < start_freq; ++n) { |
279 sols[n] = 1.f; | 279 sols[n] = 1.f; |
280 } | 280 } |
281 | 281 |
282 // Analytic solution for optimal gains. See paper for derivation. | 282 // Analytic solution for optimal gains. See paper for derivation. |
283 for (size_t n = start_freq; n < bank_size_; ++n) { | 283 for (size_t n = start_freq; n < bank_size_; ++n) { |
284 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { | 284 if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { |
285 sols[n] = 1.f; | 285 sols[n] = 1.f; |
286 } else { | 286 } else { |
287 const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + | 287 const double gamma0 = 0.5 * kRho * pow_x0[n] * pow_n0[n] + |
peah-webrtc
2016/03/30 13:44:46
I cannot see from this equations what difference t
aluebs-webrtc
2016/03/31 00:26:32
Basically because each alpha0, beta0 or gamma0 has
| |
288 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; | 288 lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; |
289 const float beta0 = | 289 const double beta0 = |
290 lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; | 290 lambda * pow_x0[n] * (2.0 - kRho) * pow_x0[n] * pow_n0[n]; |
291 const float alpha0 = | 291 const double alpha0 = |
292 lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; | 292 lambda * pow_x0[n] * (1.0 - kRho) * pow_x0[n] * pow_x0[n]; |
293 RTC_DCHECK_LT(alpha0, 0.f); | 293 RTC_DCHECK_LT(alpha0, 0.0); |
294 // The quadratic equation should always have real roots, but to guard | 294 // The quadratic equation should always have real roots, but to guard |
295 // against numerical errors we limit it to a minimum of zero. | 295 // against numerical errors we limit it to a minimum of zero. |
296 sols[n] = std::max( | 296 sols[n] = std::max( |
297 0.f, (-beta0 - std::sqrt(std::max( | 297 0.0, (-beta0 - std::sqrt(std::max( |
298 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) / | 298 0.0, beta0 * beta0 - 4.0 * alpha0 * gamma0))) / |
299 (2.f * alpha0)); | 299 (2.0 * alpha0)); |
300 } | 300 } |
301 } | 301 } |
302 } | 302 } |
303 | 303 |
304 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { | 304 bool IntelligibilityEnhancer::IsSpeech(const float* audio) { |
305 FloatToS16(audio, chunk_length_, audio_s16_.data()); | 305 FloatToS16(audio, chunk_length_, audio_s16_.data()); |
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
308 chunks_since_voice_ = 0; | 308 chunks_since_voice_ = 0; |
309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
310 ++chunks_since_voice_; | 310 ++chunks_since_voice_; |
311 } | 311 } |
312 return chunks_since_voice_ < kSpeechOffsetDelay; | 312 return chunks_since_voice_ < kSpeechOffsetDelay; |
313 } | 313 } |
314 | 314 |
315 } // namespace webrtc | 315 } // namespace webrtc |
OLD | NEW |