| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. |
| 36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; |
| 37 const float kDecayRate = 0.98f; // Power estimation decay rate. | 37 const float kDecayRate = 0.98f; // Power estimation decay rate. |
| 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. | 38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain. |
| 39 const float kRho = 0.0004f; // Default production and interpretation SNR. | 39 const float kRho = 0.0004f; // Default production and interpretation SNR. |
| 40 | 40 |
| 41 // Returns dot product of vectors |a| and |b| with size |length|. | 41 // Returns dot product of vectors |a| and |b| with size |length|. |
| 42 float DotProduct(const float* a, const float* b, size_t length) { | 42 float DotProduct(const float* a, const float* b, size_t length) { |
| 43 float ret = 0.f; | 43 float ret = 0.f; |
| 44 for (size_t i = 0; i < length; ++i) { | 44 for (size_t i = 0; i < length; ++i) { |
| 45 ret = fmaf(a[i], b[i], ret); | 45 ret += a[i] * b[i]; |
| 46 } | 46 } |
| 47 return ret; | 47 return ret; |
| 48 } | 48 } |
| 49 | 49 |
| 50 // Computes the power across ERB bands from the power spectral density |pow|. | 50 // Computes the power across ERB bands from the power spectral density |pow|. |
| 51 // Stores it in |result|. | 51 // Stores it in |result|. |
| 52 void MapToErbBands(const float* pow, | 52 void MapToErbBands(const float* pow, |
| 53 const std::vector<std::vector<float>>& filter_bank, | 53 const std::vector<std::vector<float>>& filter_bank, |
| 54 float* result) { | 54 float* result) { |
| 55 for (size_t i = 0; i < filter_bank.size(); ++i) { | 55 for (size_t i = 0; i < filter_bank.size(); ++i) { |
| (...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 194 ++iters; | 194 ++iters; |
| 195 } | 195 } |
| 196 } | 196 } |
| 197 | 197 |
| 198 void IntelligibilityEnhancer::UpdateErbGains() { | 198 void IntelligibilityEnhancer::UpdateErbGains() { |
| 199 // (ERB gain) = filterbank' * (freq gain) | 199 // (ERB gain) = filterbank' * (freq gain) |
| 200 float* gains = gain_applier_.target(); | 200 float* gains = gain_applier_.target(); |
| 201 for (size_t i = 0; i < freqs_; ++i) { | 201 for (size_t i = 0; i < freqs_; ++i) { |
| 202 gains[i] = 0.f; | 202 gains[i] = 0.f; |
| 203 for (size_t j = 0; j < bank_size_; ++j) { | 203 for (size_t j = 0; j < bank_size_; ++j) { |
| 204 gains[i] = fmaf(render_filter_bank_[j][i], gains_eq_[j], gains[i]); | 204 gains[i] += render_filter_bank_[j][i] * gains_eq_[j]; |
| 205 } | 205 } |
| 206 } | 206 } |
| 207 } | 207 } |
| 208 | 208 |
| 209 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, | 209 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, |
| 210 size_t erb_resolution) { | 210 size_t erb_resolution) { |
| 211 float freq_limit = sample_rate / 2000.f; | 211 float freq_limit = sample_rate / 2000.f; |
| 212 size_t erb_scale = static_cast<size_t>(ceilf( | 212 size_t erb_scale = static_cast<size_t>(ceilf( |
| 213 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f)); | 213 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f)); |
| 214 return erb_scale * erb_resolution; | 214 return erb_scale * erb_resolution; |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 320 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_); | 320 vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_); |
| 321 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 321 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
| 322 chunks_since_voice_ = 0; | 322 chunks_since_voice_ = 0; |
| 323 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 323 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
| 324 ++chunks_since_voice_; | 324 ++chunks_since_voice_; |
| 325 } | 325 } |
| 326 return chunks_since_voice_ < kSpeechOffsetDelay; | 326 return chunks_since_voice_ < kSpeechOffsetDelay; |
| 327 } | 327 } |
| 328 | 328 |
| 329 } // namespace webrtc | 329 } // namespace webrtc |
| OLD | NEW |