OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
61 RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 61 RTC_DCHECK_GT(filter_bank[i].size(), 0u); |
62 result[i] = kPowerNormalizationFactor * | 62 result[i] = kPowerNormalizationFactor * |
63 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 63 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); |
64 } | 64 } |
65 } | 65 } |
66 | 66 |
67 } // namespace | 67 } // namespace |
68 | 68 |
69 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 69 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
70 size_t num_render_channels, | 70 size_t num_render_channels, |
71 size_t num_bands, | |
71 size_t num_noise_bins) | 72 size_t num_noise_bins) |
72 : freqs_(RealFourier::ComplexLength( | 73 : freqs_(RealFourier::ComplexLength( |
73 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 74 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
74 num_noise_bins_(num_noise_bins), | 75 num_noise_bins_(num_noise_bins), |
75 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), | 76 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), |
76 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), | 77 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), |
77 sample_rate_hz_(sample_rate_hz), | 78 sample_rate_hz_(sample_rate_hz), |
78 num_render_channels_(num_render_channels), | 79 num_render_channels_(num_render_channels), |
79 clear_power_estimator_(freqs_, kDecayRate), | 80 clear_power_estimator_(freqs_, kDecayRate), |
80 noise_power_estimator_(num_noise_bins, kDecayRate), | 81 noise_power_estimator_(num_noise_bins, kDecayRate), |
(...skipping 22 matching lines...) Expand all Loading... | |
103 43.f)); | 104 43.f)); |
104 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); | 105 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); |
105 | 106 |
106 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); | 107 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); |
107 std::vector<float> kbd_window(window_size); | 108 std::vector<float> kbd_window(window_size); |
108 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, | 109 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, |
109 kbd_window.data()); | 110 kbd_window.data()); |
110 render_mangler_.reset(new LappedTransform( | 111 render_mangler_.reset(new LappedTransform( |
111 num_render_channels_, num_render_channels_, chunk_length_, | 112 num_render_channels_, num_render_channels_, chunk_length_, |
112 kbd_window.data(), window_size, window_size / 2, this)); | 113 kbd_window.data(), window_size, window_size / 2, this)); |
114 | |
115 const size_t initial_delay = render_mangler_->initial_delay(); | |
peah-webrtc
2016/09/13 13:29:59
Have you verified that this is indeed the delay in
aluebs-webrtc
2016/09/14 00:35:54
Yes.
| |
116 for (size_t i = 0u; i < num_bands - 1; ++i) { | |
117 high_bands_buffers_.push_back( | |
118 std::unique_ptr<AudioRingBuffer>(new AudioRingBuffer( | |
119 num_render_channels_, chunk_length_ + initial_delay))); | |
120 high_bands_buffers_[i]->MoveReadPositionBackward(initial_delay); | |
121 } | |
113 } | 122 } |
114 | 123 |
115 IntelligibilityEnhancer::~IntelligibilityEnhancer() { | 124 IntelligibilityEnhancer::~IntelligibilityEnhancer() { |
116 // Don't rely on this log, since the destructor isn't called when the app/tab | 125 if (num_chunks_ > 0) { |
117 // is killed. | 126 // Don't rely on this log, since the destructor isn't called when the |
118 LOG(LS_INFO) << "Intelligibility Enhancer was active for " | 127 // app/tab is killed. |
119 << static_cast<float>(num_active_chunks_) / num_chunks_ | 128 LOG(LS_INFO) << "Intelligibility Enhancer was active for " |
peah-webrtc
2016/09/13 13:29:59
I think it would be good to have a log line for th
aluebs-webrtc
2016/09/14 00:35:54
Done.
| |
120 << "% of the call."; | 129 << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_ |
130 << "% of the call."; | |
131 } | |
121 } | 132 } |
122 | 133 |
123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( | 134 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
124 std::vector<float> noise, float gain) { | 135 std::vector<float> noise, float gain) { |
125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); | 136 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); |
126 for (auto& bin : noise) { | 137 for (auto& bin : noise) { |
127 bin *= gain; | 138 bin *= gain; |
128 } | 139 } |
129 // Disregarding return value since buffer overflow is acceptable, because it | 140 // Disregarding return value since buffer overflow is acceptable, because it |
130 // is not critical to get each noise estimate. | 141 // is not critical to get each noise estimate. |
131 if (noise_estimation_queue_.Insert(&noise)) { | 142 if (noise_estimation_queue_.Insert(&noise)) { |
132 }; | 143 }; |
133 } | 144 } |
134 | 145 |
135 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 146 void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) { |
136 int sample_rate_hz, | 147 RTC_DCHECK_EQ(num_render_channels_, audio->num_channels()); |
137 size_t num_channels) { | |
138 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | |
139 RTC_CHECK_EQ(num_render_channels_, num_channels); | |
140 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { | 148 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { |
141 noise_power_estimator_.Step(noise_estimation_buffer_.data()); | 149 noise_power_estimator_.Step(noise_estimation_buffer_.data()); |
142 } | 150 } |
143 is_speech_ = IsSpeech(audio[0]); | 151 float* const* low_band = audio->split_channels_f(kBand0To8kHz); |
144 render_mangler_->ProcessChunk(audio, audio); | 152 is_speech_ = IsSpeech(low_band[0]); |
153 render_mangler_->ProcessChunk(low_band, low_band); | |
154 DelayHighBands(audio); | |
145 } | 155 } |
146 | 156 |
147 void IntelligibilityEnhancer::ProcessAudioBlock( | 157 void IntelligibilityEnhancer::ProcessAudioBlock( |
148 const std::complex<float>* const* in_block, | 158 const std::complex<float>* const* in_block, |
149 size_t in_channels, | 159 size_t in_channels, |
150 size_t frames, | 160 size_t frames, |
151 size_t /* out_channels */, | 161 size_t /* out_channels */, |
152 std::complex<float>* const* out_block) { | 162 std::complex<float>* const* out_block) { |
153 RTC_DCHECK_EQ(freqs_, frames); | 163 RTC_DCHECK_EQ(freqs_, frames); |
154 if (is_speech_) { | 164 if (is_speech_) { |
155 clear_power_estimator_.Step(in_block[0]); | 165 clear_power_estimator_.Step(in_block[0]); |
156 } | 166 } |
157 SnrBasedEffectActivation(); | |
158 ++num_chunks_; | |
159 if (is_active_) { | 167 if (is_active_) { |
160 ++num_active_chunks_; | 168 ++num_active_chunks_; |
161 if (num_chunks_ % kGainUpdatePeriod == 0) { | 169 if (num_chunks_ % kGainUpdatePeriod == 0) { |
162 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, | 170 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, |
163 filtered_clear_pow_.data()); | 171 filtered_clear_pow_.data()); |
164 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, | 172 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, |
165 filtered_noise_pow_.data()); | 173 filtered_noise_pow_.data()); |
166 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 174 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); |
167 const float power_target = std::accumulate( | 175 const float power_target = std::accumulate( |
168 filtered_clear_pow_.data(), | 176 filtered_clear_pow_.data(), |
169 filtered_clear_pow_.data() + bank_size_, | 177 filtered_clear_pow_.data() + bank_size_, |
170 0.f); | 178 0.f); |
171 const float power_top = | 179 const float power_top = |
172 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 180 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
173 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 181 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); |
174 const float power_bot = | 182 const float power_bot = |
175 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 183 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
176 if (power_target >= power_bot && power_target <= power_top) { | 184 if (power_target >= power_bot && power_target <= power_top) { |
177 SolveForLambda(power_target); | 185 SolveForLambda(power_target); |
178 UpdateErbGains(); | 186 UpdateErbGains(); |
179 } // Else experiencing power underflow, so do nothing. | 187 } // Else experiencing power underflow, so do nothing. |
180 } | 188 } |
181 } | 189 } |
190 SnrBasedEffectActivation(); | |
peah-webrtc
2016/09/13 13:29:59
I think it makes sense. But please comment in the
aluebs-webrtc
2016/09/14 00:35:54
Moved it back. This was left from when is_active_
| |
191 ++num_chunks_; | |
182 for (size_t i = 0; i < in_channels; ++i) { | 192 for (size_t i = 0; i < in_channels; ++i) { |
183 gain_applier_.Apply(in_block[i], out_block[i]); | 193 gain_applier_.Apply(in_block[i], out_block[i]); |
184 } | 194 } |
185 } | 195 } |
186 | 196 |
187 void IntelligibilityEnhancer::SnrBasedEffectActivation() { | 197 void IntelligibilityEnhancer::SnrBasedEffectActivation() { |
188 const float* clear_psd = clear_power_estimator_.power().data(); | 198 const float* clear_psd = clear_power_estimator_.power().data(); |
189 const float* noise_psd = noise_power_estimator_.power().data(); | 199 const float* noise_psd = noise_power_estimator_.power().data(); |
190 const float clear_power = | 200 const float clear_power = |
191 std::accumulate(clear_psd, clear_psd + freqs_, 0.f); | 201 std::accumulate(clear_psd, clear_psd + freqs_, 0.f); |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
362 FloatToS16(audio, chunk_length_, audio_s16_.data()); | 372 FloatToS16(audio, chunk_length_, audio_s16_.data()); |
363 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 373 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
364 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 374 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
365 chunks_since_voice_ = 0; | 375 chunks_since_voice_ = 0; |
366 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 376 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
367 ++chunks_since_voice_; | 377 ++chunks_since_voice_; |
368 } | 378 } |
369 return chunks_since_voice_ < kSpeechOffsetDelay; | 379 return chunks_since_voice_ < kSpeechOffsetDelay; |
370 } | 380 } |
371 | 381 |
382 void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) { | |
383 RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1u); | |
384 for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) { | |
385 Band band = static_cast<Band>(i + 1); | |
386 high_bands_buffers_[i]->Write(audio->split_channels_const_f(band), | |
387 num_render_channels_, chunk_length_); | |
388 high_bands_buffers_[i]->Read(audio->split_channels_f(band), | |
389 num_render_channels_, chunk_length_); | |
390 } | |
391 } | |
392 | |
372 } // namespace webrtc | 393 } // namespace webrtc |
OLD | NEW |