Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 61 RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 61 RTC_DCHECK_GT(filter_bank[i].size(), 0u); |
| 62 result[i] = kPowerNormalizationFactor * | 62 result[i] = kPowerNormalizationFactor * |
| 63 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 63 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); |
| 64 } | 64 } |
| 65 } | 65 } |
| 66 | 66 |
| 67 } // namespace | 67 } // namespace |
| 68 | 68 |
| 69 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 69 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
| 70 size_t num_render_channels, | 70 size_t num_render_channels, |
| 71 size_t num_bands, | |
| 71 size_t num_noise_bins) | 72 size_t num_noise_bins) |
| 72 : freqs_(RealFourier::ComplexLength( | 73 : freqs_(RealFourier::ComplexLength( |
| 73 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 74 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
| 74 num_noise_bins_(num_noise_bins), | 75 num_noise_bins_(num_noise_bins), |
| 75 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), | 76 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), |
| 76 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), | 77 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), |
| 77 sample_rate_hz_(sample_rate_hz), | 78 sample_rate_hz_(sample_rate_hz), |
| 78 num_render_channels_(num_render_channels), | 79 num_render_channels_(num_render_channels), |
| 79 clear_power_estimator_(freqs_, kDecayRate), | 80 clear_power_estimator_(freqs_, kDecayRate), |
| 80 noise_power_estimator_(num_noise_bins, kDecayRate), | 81 noise_power_estimator_(num_noise_bins, kDecayRate), |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 103 43.f)); | 104 43.f)); |
| 104 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); | 105 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); |
| 105 | 106 |
| 106 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); | 107 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); |
| 107 std::vector<float> kbd_window(window_size); | 108 std::vector<float> kbd_window(window_size); |
| 108 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, | 109 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, |
| 109 kbd_window.data()); | 110 kbd_window.data()); |
| 110 render_mangler_.reset(new LappedTransform( | 111 render_mangler_.reset(new LappedTransform( |
| 111 num_render_channels_, num_render_channels_, chunk_length_, | 112 num_render_channels_, num_render_channels_, chunk_length_, |
| 112 kbd_window.data(), window_size, window_size / 2, this)); | 113 kbd_window.data(), window_size, window_size / 2, this)); |
| 114 | |
| 115 const size_t initial_delay = render_mangler_->initial_delay(); | |
| 116 for (size_t i = 0u; i < num_bands - 1; ++i) { | |
| 117 high_bands_buffers_.push_back( | |
| 118 std::unique_ptr<AudioRingBuffer>(new AudioRingBuffer( | |
| 119 num_render_channels_, chunk_length_ + initial_delay))); | |
|
peah-webrtc
2016/09/15 15:06:20
It is a bit wasteful to throw up 2 AudioRingBuffer
aluebs-webrtc
2016/09/15 23:45:25
Good point. Implemented DelayBuffer helper class.
| |
| 120 high_bands_buffers_[i]->MoveReadPositionBackward(initial_delay); | |
| 121 } | |
| 113 } | 122 } |
| 114 | 123 |
| 115 IntelligibilityEnhancer::~IntelligibilityEnhancer() { | 124 IntelligibilityEnhancer::~IntelligibilityEnhancer() { |
| 116 // Don't rely on this log, since the destructor isn't called when the app/tab | 125 // Don't rely on this log, since the destructor isn't called when the |
| 117 // is killed. | 126 // app/tab is killed. |
| 118 LOG(LS_INFO) << "Intelligibility Enhancer was active for " | 127 if (num_chunks_ > 0) { |
| 119 << static_cast<float>(num_active_chunks_) / num_chunks_ | 128 LOG(LS_INFO) << "Intelligibility Enhancer was active for " |
| 120 << "% of the call."; | 129 << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_ |
| 130 << "% of the call."; | |
| 131 } else { | |
| 132 LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk."; | |
| 133 } | |
| 121 } | 134 } |
| 122 | 135 |
| 123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( | 136 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
| 124 std::vector<float> noise, float gain) { | 137 std::vector<float> noise, float gain) { |
| 125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); | 138 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); |
| 126 for (auto& bin : noise) { | 139 for (auto& bin : noise) { |
| 127 bin *= gain; | 140 bin *= gain; |
| 128 } | 141 } |
| 129 // Disregarding return value since buffer overflow is acceptable, because it | 142 // Disregarding return value since buffer overflow is acceptable, because it |
| 130 // is not critical to get each noise estimate. | 143 // is not critical to get each noise estimate. |
| 131 if (noise_estimation_queue_.Insert(&noise)) { | 144 if (noise_estimation_queue_.Insert(&noise)) { |
| 132 }; | 145 }; |
| 133 } | 146 } |
| 134 | 147 |
| 135 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 148 void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) { |
| 136 int sample_rate_hz, | 149 RTC_DCHECK_EQ(num_render_channels_, audio->num_channels()); |
| 137 size_t num_channels) { | |
| 138 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | |
| 139 RTC_CHECK_EQ(num_render_channels_, num_channels); | |
| 140 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { | 150 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { |
| 141 noise_power_estimator_.Step(noise_estimation_buffer_.data()); | 151 noise_power_estimator_.Step(noise_estimation_buffer_.data()); |
| 142 } | 152 } |
| 143 is_speech_ = IsSpeech(audio[0]); | 153 float* const* low_band = audio->split_channels_f(kBand0To8kHz); |
| 144 render_mangler_->ProcessChunk(audio, audio); | 154 is_speech_ = IsSpeech(low_band[0]); |
| 155 render_mangler_->ProcessChunk(low_band, low_band); | |
| 156 DelayHighBands(audio); | |
| 145 } | 157 } |
| 146 | 158 |
| 147 void IntelligibilityEnhancer::ProcessAudioBlock( | 159 void IntelligibilityEnhancer::ProcessAudioBlock( |
| 148 const std::complex<float>* const* in_block, | 160 const std::complex<float>* const* in_block, |
| 149 size_t in_channels, | 161 size_t in_channels, |
| 150 size_t frames, | 162 size_t frames, |
| 151 size_t /* out_channels */, | 163 size_t /* out_channels */, |
| 152 std::complex<float>* const* out_block) { | 164 std::complex<float>* const* out_block) { |
| 153 RTC_DCHECK_EQ(freqs_, frames); | 165 RTC_DCHECK_EQ(freqs_, frames); |
| 154 if (is_speech_) { | 166 if (is_speech_) { |
| (...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 362 FloatToS16(audio, chunk_length_, audio_s16_.data()); | 374 FloatToS16(audio, chunk_length_, audio_s16_.data()); |
| 363 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 375 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
| 364 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 376 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
| 365 chunks_since_voice_ = 0; | 377 chunks_since_voice_ = 0; |
| 366 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 378 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
| 367 ++chunks_since_voice_; | 379 ++chunks_since_voice_; |
| 368 } | 380 } |
| 369 return chunks_since_voice_ < kSpeechOffsetDelay; | 381 return chunks_since_voice_ < kSpeechOffsetDelay; |
| 370 } | 382 } |
| 371 | 383 |
| 384 void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) { | |
| 385 RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1u); | |
|
peah-webrtc
2016/09/15 15:06:20
Suggestion: Since in this case high_bands_buffers_
aluebs-webrtc
2016/09/15 23:45:25
That is not scalable if we ever decide to add supp
| |
| 386 for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) { | |
| 387 Band band = static_cast<Band>(i + 1); | |
| 388 high_bands_buffers_[i]->Write(audio->split_channels_const_f(band), | |
| 389 num_render_channels_, chunk_length_); | |
| 390 high_bands_buffers_[i]->Read(audio->split_channels_f(band), | |
| 391 num_render_channels_, chunk_length_); | |
| 392 } | |
| 393 } | |
| 394 | |
| 372 } // namespace webrtc | 395 } // namespace webrtc |
| OLD | NEW |