| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 
| 3  * | 3  * | 
| 4  *  Use of this source code is governed by a BSD-style license | 4  *  Use of this source code is governed by a BSD-style license | 
| 5  *  that can be found in the LICENSE file in the root of the source | 5  *  that can be found in the LICENSE file in the root of the source | 
| 6  *  tree. An additional intellectual property rights grant can be found | 6  *  tree. An additional intellectual property rights grant can be found | 
| 7  *  in the file PATENTS.  All contributing project authors may | 7  *  in the file PATENTS.  All contributing project authors may | 
| 8  *  be found in the AUTHORS file in the root of the source tree. | 8  *  be found in the AUTHORS file in the root of the source tree. | 
| 9  */ | 9  */ | 
| 10 | 10 | 
| (...skipping 11 matching lines...) Expand all  Loading... | 
| 22 | 22 | 
| 23 namespace webrtc { | 23 namespace webrtc { | 
| 24 | 24 | 
| 25 namespace { | 25 namespace { | 
| 26 | 26 | 
| 27 const size_t kErbResolution = 2; | 27 const size_t kErbResolution = 2; | 
| 28 const int kWindowSizeMs = 16; | 28 const int kWindowSizeMs = 16; | 
| 29 const int kChunkSizeMs = 10;  // Size provided by APM. | 29 const int kChunkSizeMs = 10;  // Size provided by APM. | 
| 30 const float kClipFreqKhz = 0.2f; | 30 const float kClipFreqKhz = 0.2f; | 
| 31 const float kKbdAlpha = 1.5f; | 31 const float kKbdAlpha = 1.5f; | 
| 32 const float kLambdaBot = -1.0f;      // Extreme values in bisection | 32 const float kLambdaBot = -1.f;      // Extreme values in bisection | 
| 33 const float kLambdaTop = -1e-5f;      // search for lamda. | 33 const float kLambdaTop = -1e-5f;      // search for lamda. | 
| 34 const float kVoiceProbabilityThreshold = 0.02f; | 34 const float kVoiceProbabilityThreshold = 0.02f; | 
| 35 // Number of chunks after voice activity which is still considered speech. | 35 // Number of chunks after voice activity which is still considered speech. | 
| 36 const size_t kSpeechOffsetDelay = 80; | 36 const size_t kSpeechOffsetDelay = 80; | 
| 37 const float kDecayRate = 0.98f;              // Power estimation decay rate. | 37 const float kDecayRate = 0.98f;              // Power estimation decay rate. | 
| 38 const float kMaxRelativeGainChange = 0.04f;  // Maximum relative change in gain. | 38 const float kMaxRelativeGainChange = 0.04f;  // Maximum relative change in gain. | 
| 39 const float kRho = 0.0004f;  // Default production and interpretation SNR. | 39 const float kRho = 0.0004f;  // Default production and interpretation SNR. | 
|  | 40 const float kPowerNormalizationFactor = 1.f / (1 << 30); | 
| 40 | 41 | 
| 41 // Returns dot product of vectors |a| and |b| with size |length|. | 42 // Returns dot product of vectors |a| and |b| with size |length|. | 
| 42 float DotProduct(const float* a, const float* b, size_t length) { | 43 float DotProduct(const float* a, const float* b, size_t length) { | 
| 43   float ret = 0.f; | 44   float ret = 0.f; | 
| 44   for (size_t i = 0; i < length; ++i) { | 45   for (size_t i = 0; i < length; ++i) { | 
| 45     ret += a[i] * b[i]; | 46     ret += a[i] * b[i]; | 
| 46   } | 47   } | 
| 47   return ret; | 48   return ret; | 
| 48 } | 49 } | 
| 49 | 50 | 
| 50 // Computes the power across ERB bands from the power spectral density |pow|. | 51 // Computes the power across ERB bands from the power spectral density |pow|. | 
| 51 // Stores it in |result|. | 52 // Stores it in |result|. | 
| 52 void MapToErbBands(const float* pow, | 53 void MapToErbBands(const float* pow, | 
| 53                    const std::vector<std::vector<float>>& filter_bank, | 54                    const std::vector<std::vector<float>>& filter_bank, | 
| 54                    float* result) { | 55                    float* result) { | 
| 55   for (size_t i = 0; i < filter_bank.size(); ++i) { | 56   for (size_t i = 0; i < filter_bank.size(); ++i) { | 
| 56     RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 57     RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 
| 57     result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 58     result[i] = kPowerNormalizationFactor * | 
|  | 59                 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 
| 58   } | 60   } | 
| 59 } | 61 } | 
| 60 | 62 | 
| 61 }  // namespace | 63 }  // namespace | 
| 62 | 64 | 
| 63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 65 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 
| 64                                                  size_t num_render_channels, | 66                                                  size_t num_render_channels, | 
| 65                                                  size_t num_noise_bins) | 67                                                  size_t num_noise_bins) | 
| 66     : freqs_(RealFourier::ComplexLength( | 68     : freqs_(RealFourier::ComplexLength( | 
| 67           RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 69           RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 133   if (is_speech_) { | 135   if (is_speech_) { | 
| 134     clear_power_estimator_.Step(in_block[0]); | 136     clear_power_estimator_.Step(in_block[0]); | 
| 135   } | 137   } | 
| 136   const std::vector<float>& clear_power = clear_power_estimator_.power(); | 138   const std::vector<float>& clear_power = clear_power_estimator_.power(); | 
| 137   const std::vector<float>& noise_power = noise_power_estimator_.power(); | 139   const std::vector<float>& noise_power = noise_power_estimator_.power(); | 
| 138   MapToErbBands(clear_power.data(), render_filter_bank_, | 140   MapToErbBands(clear_power.data(), render_filter_bank_, | 
| 139                 filtered_clear_pow_.data()); | 141                 filtered_clear_pow_.data()); | 
| 140   MapToErbBands(noise_power.data(), capture_filter_bank_, | 142   MapToErbBands(noise_power.data(), capture_filter_bank_, | 
| 141                 filtered_noise_pow_.data()); | 143                 filtered_noise_pow_.data()); | 
| 142   SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 144   SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 
| 143   const float power_target = | 145   const float power_target = std::accumulate( | 
| 144       std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); | 146       filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f); | 
| 145   const float power_top = | 147   const float power_top = | 
| 146       DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 148       DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 
| 147   SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 149   SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 
| 148   const float power_bot = | 150   const float power_bot = | 
| 149       DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 151       DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 
| 150   if (power_target >= power_bot && power_target <= power_top) { | 152   if (power_target >= power_bot && power_target <= power_top) { | 
| 151     SolveForLambda(power_target); | 153     SolveForLambda(power_target); | 
| 152     UpdateErbGains(); | 154     UpdateErbGains(); | 
| 153   }  // Else experiencing power underflow, so do nothing. | 155   }  // Else experiencing power underflow, so do nothing. | 
| 154   for (size_t i = 0; i < in_channels; ++i) { | 156   for (size_t i = 0; i < in_channels; ++i) { | 
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 306   vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 308   vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 
| 307   if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 309   if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 
| 308     chunks_since_voice_ = 0; | 310     chunks_since_voice_ = 0; | 
| 309   } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 311   } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 
| 310     ++chunks_since_voice_; | 312     ++chunks_since_voice_; | 
| 311   } | 313   } | 
| 312   return chunks_since_voice_ < kSpeechOffsetDelay; | 314   return chunks_since_voice_ < kSpeechOffsetDelay; | 
| 313 } | 315 } | 
| 314 | 316 | 
| 315 }  // namespace webrtc | 317 }  // namespace webrtc | 
| OLD | NEW | 
|---|