Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)
Patch Set: Rebasing Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
61 RTC_DCHECK_GT(filter_bank[i].size(), 0u); 61 RTC_DCHECK_GT(filter_bank[i].size(), 0u);
62 result[i] = kPowerNormalizationFactor * 62 result[i] = kPowerNormalizationFactor *
63 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); 63 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
64 } 64 }
65 } 65 }
66 66
67 } // namespace 67 } // namespace
68 68
69 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, 69 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
70 size_t num_render_channels, 70 size_t num_render_channels,
71 size_t num_bands,
71 size_t num_noise_bins) 72 size_t num_noise_bins)
72 : freqs_(RealFourier::ComplexLength( 73 : freqs_(RealFourier::ComplexLength(
73 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), 74 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
74 num_noise_bins_(num_noise_bins), 75 num_noise_bins_(num_noise_bins),
75 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), 76 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
76 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), 77 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
77 sample_rate_hz_(sample_rate_hz), 78 sample_rate_hz_(sample_rate_hz),
78 num_render_channels_(num_render_channels), 79 num_render_channels_(num_render_channels),
79 clear_power_estimator_(freqs_, kDecayRate), 80 clear_power_estimator_(freqs_, kDecayRate),
80 noise_power_estimator_(num_noise_bins, kDecayRate), 81 noise_power_estimator_(num_noise_bins, kDecayRate),
(...skipping 22 matching lines...) Expand all
103 43.f)); 104 43.f));
104 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); 105 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
105 106
106 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); 107 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
107 std::vector<float> kbd_window(window_size); 108 std::vector<float> kbd_window(window_size);
108 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, 109 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
109 kbd_window.data()); 110 kbd_window.data());
110 render_mangler_.reset(new LappedTransform( 111 render_mangler_.reset(new LappedTransform(
111 num_render_channels_, num_render_channels_, chunk_length_, 112 num_render_channels_, num_render_channels_, chunk_length_,
112 kbd_window.data(), window_size, window_size / 2, this)); 113 kbd_window.data(), window_size, window_size / 2, this));
114
115 const size_t initial_delay = render_mangler_->initial_delay();
116 for (size_t i = 0u; i < num_bands - 1; ++i) {
117 high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(
118 new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));
119 }
113 } 120 }
114 121
115 IntelligibilityEnhancer::~IntelligibilityEnhancer() { 122 IntelligibilityEnhancer::~IntelligibilityEnhancer() {
116 // Don't rely on this log, since the destructor isn't called when the app/tab 123 // Don't rely on this log, since the destructor isn't called when the
117 // is killed. 124 // app/tab is killed.
118 LOG(LS_INFO) << "Intelligibility Enhancer was active for " 125 if (num_chunks_ > 0) {
119 << static_cast<float>(num_active_chunks_) / num_chunks_ 126 LOG(LS_INFO) << "Intelligibility Enhancer was active for "
120 << "% of the call."; 127 << 100.f * static_cast<float>(num_active_chunks_) / num_chunks_
128 << "% of the call.";
129 } else {
130 LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";
131 }
121 } 132 }
122 133
123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( 134 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
124 std::vector<float> noise, float gain) { 135 std::vector<float> noise, float gain) {
125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); 136 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
126 for (auto& bin : noise) { 137 for (auto& bin : noise) {
127 bin *= gain; 138 bin *= gain;
128 } 139 }
129 // Disregarding return value since buffer overflow is acceptable, because it 140 // Disregarding return value since buffer overflow is acceptable, because it
130 // is not critical to get each noise estimate. 141 // is not critical to get each noise estimate.
131 if (noise_estimation_queue_.Insert(&noise)) { 142 if (noise_estimation_queue_.Insert(&noise)) {
132 }; 143 };
133 } 144 }
134 145
135 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, 146 void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {
136 int sample_rate_hz, 147 RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());
137 size_t num_channels) {
138 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
139 RTC_CHECK_EQ(num_render_channels_, num_channels);
140 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { 148 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
141 noise_power_estimator_.Step(noise_estimation_buffer_.data()); 149 noise_power_estimator_.Step(noise_estimation_buffer_.data());
142 } 150 }
143 is_speech_ = IsSpeech(audio[0]); 151 float* const* low_band = audio->split_channels_f(kBand0To8kHz);
144 render_mangler_->ProcessChunk(audio, audio); 152 is_speech_ = IsSpeech(low_band[0]);
153 render_mangler_->ProcessChunk(low_band, low_band);
154 DelayHighBands(audio);
145 } 155 }
146 156
147 void IntelligibilityEnhancer::ProcessAudioBlock( 157 void IntelligibilityEnhancer::ProcessAudioBlock(
148 const std::complex<float>* const* in_block, 158 const std::complex<float>* const* in_block,
149 size_t in_channels, 159 size_t in_channels,
150 size_t frames, 160 size_t frames,
151 size_t /* out_channels */, 161 size_t /* out_channels */,
152 std::complex<float>* const* out_block) { 162 std::complex<float>* const* out_block) {
153 RTC_DCHECK_EQ(freqs_, frames); 163 RTC_DCHECK_EQ(freqs_, frames);
154 if (is_speech_) { 164 if (is_speech_) {
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 FloatToS16(audio, chunk_length_, audio_s16_.data()); 372 FloatToS16(audio, chunk_length_, audio_s16_.data());
363 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 373 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
364 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 374 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
365 chunks_since_voice_ = 0; 375 chunks_since_voice_ = 0;
366 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 376 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
367 ++chunks_since_voice_; 377 ++chunks_since_voice_;
368 } 378 }
369 return chunks_since_voice_ < kSpeechOffsetDelay; 379 return chunks_since_voice_ < kSpeechOffsetDelay;
370 } 380 }
371 381
382 void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
383 RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1u);
384 for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {
385 Band band = static_cast<Band>(i + 1);
386 high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);
387 }
388 }
389
372 } // namespace webrtc 390 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698