Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 kbd_window.data()); 109 kbd_window.data());
110 render_mangler_.reset(new LappedTransform( 110 render_mangler_.reset(new LappedTransform(
111 num_render_channels_, num_render_channels_, chunk_length_, 111 num_render_channels_, num_render_channels_, chunk_length_,
112 kbd_window.data(), window_size, window_size / 2, this)); 112 kbd_window.data(), window_size, window_size / 2, this));
113 } 113 }
114 114
115 IntelligibilityEnhancer::~IntelligibilityEnhancer() { 115 IntelligibilityEnhancer::~IntelligibilityEnhancer() {
116 // Don't rely on this log, since the destructor isn't called when the app/tab 116 // Don't rely on this log, since the destructor isn't called when the app/tab
117 // is killed. 117 // is killed.
118 LOG(LS_INFO) << "Intelligibility Enhancer was active for " 118 LOG(LS_INFO) << "Intelligibility Enhancer was active for "
119 << static_cast<float>(num_active_chunks_) / num_chunks_ 119 << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_
peah-webrtc 2016/09/09 09:12:16 This will cause an exception if IE is destroyed ri
aluebs-webrtc 2016/09/10 00:47:55 Done.
120 << "% of the call."; 120 << "% of the call.";
121 } 121 }
122 122
123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( 123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
124 std::vector<float> noise, float gain) { 124 std::vector<float> noise, float gain) {
125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); 125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
126 for (auto& bin : noise) { 126 for (auto& bin : noise) {
127 bin *= gain; 127 bin *= gain;
128 } 128 }
129 // Disregarding return value since buffer overflow is acceptable, because it 129 // Disregarding return value since buffer overflow is acceptable, because it
130 // is not critical to get each noise estimate. 130 // is not critical to get each noise estimate.
131 if (noise_estimation_queue_.Insert(&noise)) { 131 if (noise_estimation_queue_.Insert(&noise)) {
132 }; 132 };
133 } 133 }
134 134
135 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, 135 void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio,
136 int sample_rate_hz, 136 int sample_rate_hz) {
peah-webrtc 2016/09/09 09:12:16 Is it really necessary to pass the sample rate to
aluebs-webrtc 2016/09/10 00:47:55 AudioBuffer doesn't have a sample_rate() method. B
137 size_t num_channels) {
138 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); 137 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
139 RTC_CHECK_EQ(num_render_channels_, num_channels); 138 RTC_CHECK_EQ(num_render_channels_, audio->num_channels());
peah-webrtc 2016/09/09 09:12:16 This looks like something that there should be a D
aluebs-webrtc 2016/09/10 00:47:55 Done.
140 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { 139 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
141 noise_power_estimator_.Step(noise_estimation_buffer_.data()); 140 noise_power_estimator_.Step(noise_estimation_buffer_.data());
142 } 141 }
143 is_speech_ = IsSpeech(audio[0]); 142 float* const* in_low_band = audio->split_channels_f(kBand0To8kHz);
144 render_mangler_->ProcessChunk(audio, audio); 143 float* const* out_low_band = is_active_ ? in_low_band : nullptr;
144 is_speech_ = IsSpeech(in_low_band[0]);
145 render_mangler_->ProcessChunk(in_low_band, out_low_band);
peah-webrtc 2016/09/09 09:12:16 With this approach, you only let the audio pass th
aluebs-webrtc 2016/09/10 00:47:55 Good catch. I am not sure why I wrongly assumed th
146 if (is_active_) {
peah-webrtc 2016/09/09 09:12:16 You cannot turn this on/off during the call as tha
aluebs-webrtc 2016/09/10 00:47:55 Same as above.
147 DelayHighBands(audio);
148 }
145 } 149 }
146 150
147 void IntelligibilityEnhancer::ProcessAudioBlock( 151 void IntelligibilityEnhancer::ProcessAudioBlock(
148 const std::complex<float>* const* in_block, 152 const std::complex<float>* const* in_block,
149 size_t in_channels, 153 size_t in_channels,
150 size_t frames, 154 size_t frames,
151 size_t /* out_channels */, 155 size_t /* out_channels */,
152 std::complex<float>* const* out_block) { 156 std::complex<float>* const* out_block) {
153 RTC_DCHECK_EQ(freqs_, frames); 157 RTC_DCHECK_EQ(freqs_, frames);
154 if (is_speech_) { 158 if (is_speech_) {
155 clear_power_estimator_.Step(in_block[0]); 159 clear_power_estimator_.Step(in_block[0]);
156 } 160 }
157 SnrBasedEffectActivation();
158 ++num_chunks_;
159 if (is_active_) { 161 if (is_active_) {
160 ++num_active_chunks_; 162 ++num_active_chunks_;
161 if (num_chunks_ % kGainUpdatePeriod == 0) { 163 if (num_chunks_ % kGainUpdatePeriod == 0) {
162 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, 164 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
163 filtered_clear_pow_.data()); 165 filtered_clear_pow_.data());
164 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, 166 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
165 filtered_noise_pow_.data()); 167 filtered_noise_pow_.data());
166 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); 168 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
167 const float power_target = std::accumulate( 169 const float power_target = std::accumulate(
168 filtered_clear_pow_.data(), 170 filtered_clear_pow_.data(),
169 filtered_clear_pow_.data() + bank_size_, 171 filtered_clear_pow_.data() + bank_size_,
170 0.f); 172 0.f);
171 const float power_top = 173 const float power_top =
172 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 174 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
173 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); 175 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
174 const float power_bot = 176 const float power_bot =
175 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 177 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
176 if (power_target >= power_bot && power_target <= power_top) { 178 if (power_target >= power_bot && power_target <= power_top) {
177 SolveForLambda(power_target); 179 SolveForLambda(power_target);
178 UpdateErbGains(); 180 UpdateErbGains();
179 } // Else experiencing power underflow, so do nothing. 181 } // Else experiencing power underflow, so do nothing.
180 } 182 }
181 } 183 }
184 SnrBasedEffectActivation();
185 ++num_chunks_;
182 for (size_t i = 0; i < in_channels; ++i) { 186 for (size_t i = 0; i < in_channels; ++i) {
183 gain_applier_.Apply(in_block[i], out_block[i]); 187 gain_applier_.Apply(in_block[i], out_block[i]);
184 } 188 }
185 } 189 }
186 190
187 void IntelligibilityEnhancer::SnrBasedEffectActivation() { 191 void IntelligibilityEnhancer::SnrBasedEffectActivation() {
188 const float* clear_psd = clear_power_estimator_.power().data(); 192 const float* clear_psd = clear_power_estimator_.power().data();
189 const float* noise_psd = noise_power_estimator_.power().data(); 193 const float* noise_psd = noise_power_estimator_.power().data();
190 const float clear_power = 194 const float clear_power =
191 std::accumulate(clear_psd, clear_psd + freqs_, 0.f); 195 std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 FloatToS16(audio, chunk_length_, audio_s16_.data()); 366 FloatToS16(audio, chunk_length_, audio_s16_.data());
363 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 367 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
364 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 368 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
365 chunks_since_voice_ = 0; 369 chunks_since_voice_ = 0;
366 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 370 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
367 ++chunks_since_voice_; 371 ++chunks_since_voice_;
368 } 372 }
369 return chunks_since_voice_ < kSpeechOffsetDelay; 373 return chunks_since_voice_ < kSpeechOffsetDelay;
370 } 374 }
371 375
376 void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
377 size_t num_bands = audio->num_bands();
378 if (num_bands != high_bands_buffers_.size() + 1u) {
peah-webrtc 2016/09/09 09:12:16 Please put the initialization of this in the const
aluebs-webrtc 2016/09/10 00:47:55 Good point. I thought it needed to be dynamic, but
379 high_bands_buffers_.clear();
380 const size_t initial_delay = render_mangler_->initial_delay();
381 for (size_t i = 0u; i < num_bands - 1; ++i) {
382 high_bands_buffers_.push_back(
383 std::unique_ptr<AudioRingBuffer>(new AudioRingBuffer(
384 num_render_channels_, chunk_length_ + initial_delay)));
385 high_bands_buffers_[i]->MoveReadPositionBackward(initial_delay);
386 }
387 }
388 for (size_t i = 0u; i < num_bands - 1; ++i) {
389 Band band = static_cast<Band>(i + 1);
390 high_bands_buffers_[i]->Write(audio->split_channels_const_f(band),
391 num_render_channels_, chunk_length_);
392 high_bands_buffers_[i]->Read(audio->split_channels_f(band),
393 num_render_channels_, chunk_length_);
394 }
395 }
396
372 } // namespace webrtc 397 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698