Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1766383002: Convert IntelligibilityEnhancer to multi-threaded mode (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Formatting Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
54 float* result) { 54 float* result) {
55 for (size_t i = 0; i < filter_bank.size(); ++i) { 55 for (size_t i = 0; i < filter_bank.size(); ++i) {
56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); 56 RTC_DCHECK_GT(filter_bank[i].size(), 0u);
57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); 57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
58 } 58 }
59 } 59 }
60 60
61 } // namespace 61 } // namespace
62 62
63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, 63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
64 size_t num_render_channels) 64 size_t num_render_channels,
65 size_t num_noise_bins)
65 : freqs_(RealFourier::ComplexLength( 66 : freqs_(RealFourier::ComplexLength(
66 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), 67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
68 num_noise_bins_(num_noise_bins),
67 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), 69 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
68 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), 70 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
69 sample_rate_hz_(sample_rate_hz), 71 sample_rate_hz_(sample_rate_hz),
70 num_render_channels_(num_render_channels), 72 num_render_channels_(num_render_channels),
71 clear_power_estimator_(freqs_, kDecayRate), 73 clear_power_estimator_(freqs_, kDecayRate),
72 noise_power_estimator_( 74 noise_power_estimator_(num_noise_bins, kDecayRate),
73 new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)),
74 filtered_clear_pow_(bank_size_, 0.f), 75 filtered_clear_pow_(bank_size_, 0.f),
75 filtered_noise_pow_(bank_size_, 0.f), 76 filtered_noise_pow_(num_noise_bins, 0.f),
76 center_freqs_(bank_size_), 77 center_freqs_(bank_size_),
78 capture_filter_bank_(CreateErbBank(num_noise_bins)),
77 render_filter_bank_(CreateErbBank(freqs_)), 79 render_filter_bank_(CreateErbBank(freqs_)),
78 gains_eq_(bank_size_), 80 gains_eq_(bank_size_),
79 gain_applier_(freqs_, kMaxRelativeGainChange), 81 gain_applier_(freqs_, kMaxRelativeGainChange),
80 audio_s16_(chunk_length_), 82 audio_s16_(chunk_length_),
81 chunks_since_voice_(kSpeechOffsetDelay), 83 chunks_since_voice_(kSpeechOffsetDelay),
82 is_speech_(false) { 84 is_speech_(false),
85 noise_estimation_buffer_(num_noise_bins),
86 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
87 std::vector<float>(num_noise_bins),
88 RenderQueueItemVerifier<float>(num_noise_bins)) {
83 RTC_DCHECK_LE(kRho, 1.f); 89 RTC_DCHECK_LE(kRho, 1.f);
84 90
85 const size_t erb_index = static_cast<size_t>( 91 const size_t erb_index = static_cast<size_t>(
86 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + 92 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
87 43.f)); 93 43.f));
88 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); 94 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
89 95
90 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_)); 96 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_));
91 std::vector<float> kbd_window(window_size); 97 std::vector<float> kbd_window(window_size);
92 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, 98 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
93 kbd_window.data()); 99 kbd_window.data());
94 render_mangler_.reset(new LappedTransform( 100 render_mangler_.reset(new LappedTransform(
95 num_render_channels_, num_render_channels_, chunk_length_, 101 num_render_channels_, num_render_channels_, chunk_length_,
96 kbd_window.data(), window_size, window_size / 2, this)); 102 kbd_window.data(), window_size, window_size / 2, this));
97 } 103 }
98 104
99 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( 105 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
100 std::vector<float> noise) { 106 std::vector<float> noise) {
101 if (capture_filter_bank_.size() != bank_size_ || 107 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
102 capture_filter_bank_[0].size() != noise.size()) { 108 // Disregarding return value since buffer overflow is acceptable, because it
103 capture_filter_bank_ = CreateErbBank(noise.size()); 109 // is not critical to get each noise estimate.
104 noise_power_estimator_.reset( 110 if (noise_estimation_queue_.Insert(&noise)) {
105 new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate)); 111 };
tommi (sloooow) - chröme 2016/03/10 21:59:12 nit: remove the if() and then you can leave in the
aluebs-webrtc 2016/03/11 09:36:48 That is a way of making all compilers happy and no
106 }
107 noise_power_estimator_->Step(noise.data());
108 } 112 }
109 113
110 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, 114 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
111 int sample_rate_hz, 115 int sample_rate_hz,
112 size_t num_channels) { 116 size_t num_channels) {
113 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); 117 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
114 RTC_CHECK_EQ(num_render_channels_, num_channels); 118 RTC_CHECK_EQ(num_render_channels_, num_channels);
119 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
120 noise_power_estimator_.Step(noise_estimation_buffer_.data());
121 }
115 is_speech_ = IsSpeech(audio[0]); 122 is_speech_ = IsSpeech(audio[0]);
116 render_mangler_->ProcessChunk(audio, audio); 123 render_mangler_->ProcessChunk(audio, audio);
117 } 124 }
118 125
119 void IntelligibilityEnhancer::ProcessAudioBlock( 126 void IntelligibilityEnhancer::ProcessAudioBlock(
120 const std::complex<float>* const* in_block, 127 const std::complex<float>* const* in_block,
121 size_t in_channels, 128 size_t in_channels,
122 size_t frames, 129 size_t frames,
123 size_t /* out_channels */, 130 size_t /* out_channels */,
124 std::complex<float>* const* out_block) { 131 std::complex<float>* const* out_block) {
125 RTC_DCHECK_EQ(freqs_, frames); 132 RTC_DCHECK_EQ(freqs_, frames);
126 if (is_speech_) { 133 if (is_speech_) {
127 clear_power_estimator_.Step(in_block[0]); 134 clear_power_estimator_.Step(in_block[0]);
128 } 135 }
129 const std::vector<float>& clear_power = clear_power_estimator_.power(); 136 const std::vector<float>& clear_power = clear_power_estimator_.power();
130 const std::vector<float>& noise_power = noise_power_estimator_->power(); 137 const std::vector<float>& noise_power = noise_power_estimator_.power();
131 MapToErbBands(clear_power.data(), render_filter_bank_, 138 MapToErbBands(clear_power.data(), render_filter_bank_,
132 filtered_clear_pow_.data()); 139 filtered_clear_pow_.data());
133 MapToErbBands(noise_power.data(), capture_filter_bank_, 140 MapToErbBands(noise_power.data(), capture_filter_bank_,
134 filtered_noise_pow_.data()); 141 filtered_noise_pow_.data());
135 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); 142 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
136 const float power_target = 143 const float power_target =
137 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); 144 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f);
138 const float power_top = 145 const float power_top =
139 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); 146 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
140 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); 147 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
299 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); 306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
300 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { 307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
301 chunks_since_voice_ = 0; 308 chunks_since_voice_ = 0;
302 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { 309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
303 ++chunks_since_voice_; 310 ++chunks_since_voice_;
304 } 311 }
305 return chunks_since_voice_ < kSpeechOffsetDelay; 312 return chunks_since_voice_ < kSpeechOffsetDelay;
306 } 313 }
307 314
308 } // namespace webrtc 315 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698