Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 109 kbd_window.data()); | 109 kbd_window.data()); |
| 110 render_mangler_.reset(new LappedTransform( | 110 render_mangler_.reset(new LappedTransform( |
| 111 num_render_channels_, num_render_channels_, chunk_length_, | 111 num_render_channels_, num_render_channels_, chunk_length_, |
| 112 kbd_window.data(), window_size, window_size / 2, this)); | 112 kbd_window.data(), window_size, window_size / 2, this)); |
| 113 } | 113 } |
| 114 | 114 |
| 115 IntelligibilityEnhancer::~IntelligibilityEnhancer() { | 115 IntelligibilityEnhancer::~IntelligibilityEnhancer() { |
| 116 // Don't rely on this log, since the destructor isn't called when the app/tab | 116 // Don't rely on this log, since the destructor isn't called when the app/tab |
| 117 // is killed. | 117 // is killed. |
| 118 LOG(LS_INFO) << "Intelligibility Enhancer was active for " | 118 LOG(LS_INFO) << "Intelligibility Enhancer was active for " |
| 119 << static_cast<float>(num_active_chunks_) / num_chunks_ | 119 << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_ |
|
peah-webrtc
2016/09/09 09:12:16
This will cause an exception if IE is destroyed ri
aluebs-webrtc
2016/09/10 00:47:55
Done.
| |
| 120 << "% of the call."; | 120 << "% of the call."; |
| 121 } | 121 } |
| 122 | 122 |
| 123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( | 123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
| 124 std::vector<float> noise, float gain) { | 124 std::vector<float> noise, float gain) { |
| 125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); | 125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); |
| 126 for (auto& bin : noise) { | 126 for (auto& bin : noise) { |
| 127 bin *= gain; | 127 bin *= gain; |
| 128 } | 128 } |
| 129 // Disregarding return value since buffer overflow is acceptable, because it | 129 // Disregarding return value since buffer overflow is acceptable, because it |
| 130 // is not critical to get each noise estimate. | 130 // is not critical to get each noise estimate. |
| 131 if (noise_estimation_queue_.Insert(&noise)) { | 131 if (noise_estimation_queue_.Insert(&noise)) { |
| 132 }; | 132 }; |
| 133 } | 133 } |
| 134 | 134 |
| 135 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 135 void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio, |
| 136 int sample_rate_hz, | 136 int sample_rate_hz) { |
|
peah-webrtc
2016/09/09 09:12:16
Is it really necessary to pass the sample rate to
aluebs-webrtc
2016/09/10 00:47:55
AudioBuffer doesn't have a sample_rate() method. B
| |
| 137 size_t num_channels) { | |
| 138 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | 137 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); |
| 139 RTC_CHECK_EQ(num_render_channels_, num_channels); | 138 RTC_CHECK_EQ(num_render_channels_, audio->num_channels()); |
|
peah-webrtc
2016/09/09 09:12:16
This looks like something that there should be a D
aluebs-webrtc
2016/09/10 00:47:55
Done.
| |
| 140 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { | 139 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { |
| 141 noise_power_estimator_.Step(noise_estimation_buffer_.data()); | 140 noise_power_estimator_.Step(noise_estimation_buffer_.data()); |
| 142 } | 141 } |
| 143 is_speech_ = IsSpeech(audio[0]); | 142 float* const* in_low_band = audio->split_channels_f(kBand0To8kHz); |
| 144 render_mangler_->ProcessChunk(audio, audio); | 143 float* const* out_low_band = is_active_ ? in_low_band : nullptr; |
| 144 is_speech_ = IsSpeech(in_low_band[0]); | |
| 145 render_mangler_->ProcessChunk(in_low_band, out_low_band); | |
|
peah-webrtc
2016/09/09 09:12:16
With this approach, you only let the audio pass th
aluebs-webrtc
2016/09/10 00:47:55
Good catch. I am not sure why I wrongly assumed th
| |
| 146 if (is_active_) { | |
|
peah-webrtc
2016/09/09 09:12:16
You cannot turn this on/off during the call as tha
aluebs-webrtc
2016/09/10 00:47:55
Same as above.
| |
| 147 DelayHighBands(audio); | |
| 148 } | |
| 145 } | 149 } |
| 146 | 150 |
| 147 void IntelligibilityEnhancer::ProcessAudioBlock( | 151 void IntelligibilityEnhancer::ProcessAudioBlock( |
| 148 const std::complex<float>* const* in_block, | 152 const std::complex<float>* const* in_block, |
| 149 size_t in_channels, | 153 size_t in_channels, |
| 150 size_t frames, | 154 size_t frames, |
| 151 size_t /* out_channels */, | 155 size_t /* out_channels */, |
| 152 std::complex<float>* const* out_block) { | 156 std::complex<float>* const* out_block) { |
| 153 RTC_DCHECK_EQ(freqs_, frames); | 157 RTC_DCHECK_EQ(freqs_, frames); |
| 154 if (is_speech_) { | 158 if (is_speech_) { |
| 155 clear_power_estimator_.Step(in_block[0]); | 159 clear_power_estimator_.Step(in_block[0]); |
| 156 } | 160 } |
| 157 SnrBasedEffectActivation(); | |
| 158 ++num_chunks_; | |
| 159 if (is_active_) { | 161 if (is_active_) { |
| 160 ++num_active_chunks_; | 162 ++num_active_chunks_; |
| 161 if (num_chunks_ % kGainUpdatePeriod == 0) { | 163 if (num_chunks_ % kGainUpdatePeriod == 0) { |
| 162 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, | 164 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, |
| 163 filtered_clear_pow_.data()); | 165 filtered_clear_pow_.data()); |
| 164 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, | 166 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, |
| 165 filtered_noise_pow_.data()); | 167 filtered_noise_pow_.data()); |
| 166 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 168 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); |
| 167 const float power_target = std::accumulate( | 169 const float power_target = std::accumulate( |
| 168 filtered_clear_pow_.data(), | 170 filtered_clear_pow_.data(), |
| 169 filtered_clear_pow_.data() + bank_size_, | 171 filtered_clear_pow_.data() + bank_size_, |
| 170 0.f); | 172 0.f); |
| 171 const float power_top = | 173 const float power_top = |
| 172 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 174 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
| 173 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 175 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); |
| 174 const float power_bot = | 176 const float power_bot = |
| 175 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 177 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
| 176 if (power_target >= power_bot && power_target <= power_top) { | 178 if (power_target >= power_bot && power_target <= power_top) { |
| 177 SolveForLambda(power_target); | 179 SolveForLambda(power_target); |
| 178 UpdateErbGains(); | 180 UpdateErbGains(); |
| 179 } // Else experiencing power underflow, so do nothing. | 181 } // Else experiencing power underflow, so do nothing. |
| 180 } | 182 } |
| 181 } | 183 } |
| 184 SnrBasedEffectActivation(); | |
| 185 ++num_chunks_; | |
| 182 for (size_t i = 0; i < in_channels; ++i) { | 186 for (size_t i = 0; i < in_channels; ++i) { |
| 183 gain_applier_.Apply(in_block[i], out_block[i]); | 187 gain_applier_.Apply(in_block[i], out_block[i]); |
| 184 } | 188 } |
| 185 } | 189 } |
| 186 | 190 |
| 187 void IntelligibilityEnhancer::SnrBasedEffectActivation() { | 191 void IntelligibilityEnhancer::SnrBasedEffectActivation() { |
| 188 const float* clear_psd = clear_power_estimator_.power().data(); | 192 const float* clear_psd = clear_power_estimator_.power().data(); |
| 189 const float* noise_psd = noise_power_estimator_.power().data(); | 193 const float* noise_psd = noise_power_estimator_.power().data(); |
| 190 const float clear_power = | 194 const float clear_power = |
| 191 std::accumulate(clear_psd, clear_psd + freqs_, 0.f); | 195 std::accumulate(clear_psd, clear_psd + freqs_, 0.f); |
| (...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 362 FloatToS16(audio, chunk_length_, audio_s16_.data()); | 366 FloatToS16(audio, chunk_length_, audio_s16_.data()); |
| 363 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 367 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
| 364 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 368 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
| 365 chunks_since_voice_ = 0; | 369 chunks_since_voice_ = 0; |
| 366 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 370 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
| 367 ++chunks_since_voice_; | 371 ++chunks_since_voice_; |
| 368 } | 372 } |
| 369 return chunks_since_voice_ < kSpeechOffsetDelay; | 373 return chunks_since_voice_ < kSpeechOffsetDelay; |
| 370 } | 374 } |
| 371 | 375 |
| 376 void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) { | |
| 377 size_t num_bands = audio->num_bands(); | |
| 378 if (num_bands != high_bands_buffers_.size() + 1u) { | |
|
peah-webrtc
2016/09/09 09:12:16
Please put the initialization of this in the const
aluebs-webrtc
2016/09/10 00:47:55
Good point. I thought it needed to be dynamic, but
| |
| 379 high_bands_buffers_.clear(); | |
| 380 const size_t initial_delay = render_mangler_->initial_delay(); | |
| 381 for (size_t i = 0u; i < num_bands - 1; ++i) { | |
| 382 high_bands_buffers_.push_back( | |
| 383 std::unique_ptr<AudioRingBuffer>(new AudioRingBuffer( | |
| 384 num_render_channels_, chunk_length_ + initial_delay))); | |
| 385 high_bands_buffers_[i]->MoveReadPositionBackward(initial_delay); | |
| 386 } | |
| 387 } | |
| 388 for (size_t i = 0u; i < num_bands - 1; ++i) { | |
| 389 Band band = static_cast<Band>(i + 1); | |
| 390 high_bands_buffers_[i]->Write(audio->split_channels_const_f(band), | |
| 391 num_render_channels_, chunk_length_); | |
| 392 high_bands_buffers_[i]->Read(audio->split_channels_f(band), | |
| 393 num_render_channels_, chunk_length_); | |
| 394 } | |
| 395 } | |
| 396 | |
| 372 } // namespace webrtc | 397 } // namespace webrtc |
| OLD | NEW |