OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 43 matching lines...) Loading... | |
54 float* result) { | 54 float* result) { |
55 for (size_t i = 0; i < filter_bank.size(); ++i) { | 55 for (size_t i = 0; i < filter_bank.size(); ++i) { |
56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); |
57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); |
58 } | 58 } |
59 } | 59 } |
60 | 60 |
61 } // namespace | 61 } // namespace |
62 | 62 |
63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
64 size_t num_render_channels) | 64 size_t num_render_channels, |
65 size_t num_noise_bins) | |
65 : freqs_(RealFourier::ComplexLength( | 66 : freqs_(RealFourier::ComplexLength( |
66 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
68 num_noise_bins_(num_noise_bins), | |
67 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), | 69 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), |
68 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), | 70 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), |
69 sample_rate_hz_(sample_rate_hz), | 71 sample_rate_hz_(sample_rate_hz), |
70 num_render_channels_(num_render_channels), | 72 num_render_channels_(num_render_channels), |
71 clear_power_estimator_(freqs_, kDecayRate), | 73 clear_power_estimator_(freqs_, kDecayRate), |
72 noise_power_estimator_( | 74 noise_power_estimator_(freqs_, kDecayRate), |
73 new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)), | |
74 filtered_clear_pow_(bank_size_, 0.f), | 75 filtered_clear_pow_(bank_size_, 0.f), |
75 filtered_noise_pow_(bank_size_, 0.f), | 76 filtered_noise_pow_(num_noise_bins, 0.f), |
76 center_freqs_(bank_size_), | 77 center_freqs_(bank_size_), |
78 capture_filter_bank_(CreateErbBank(num_noise_bins)), | |
77 render_filter_bank_(CreateErbBank(freqs_)), | 79 render_filter_bank_(CreateErbBank(freqs_)), |
78 gains_eq_(bank_size_), | 80 gains_eq_(bank_size_), |
79 gain_applier_(freqs_, kMaxRelativeGainChange), | 81 gain_applier_(freqs_, kMaxRelativeGainChange), |
80 audio_s16_(chunk_length_), | 82 audio_s16_(chunk_length_), |
81 chunks_since_voice_(kSpeechOffsetDelay), | 83 chunks_since_voice_(kSpeechOffsetDelay), |
82 is_speech_(false) { | 84 is_speech_(false), |
85 noise_estimation_buffer_(num_noise_bins) { | |
83 RTC_DCHECK_LE(kRho, 1.f); | 86 RTC_DCHECK_LE(kRho, 1.f); |
84 | 87 |
85 const size_t erb_index = static_cast<size_t>( | 88 const size_t erb_index = static_cast<size_t>( |
86 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + | 89 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + |
87 43.f)); | 90 43.f)); |
88 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); | 91 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); |
89 | 92 |
90 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_)); | 93 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_)); |
91 std::vector<float> kbd_window(window_size); | 94 std::vector<float> kbd_window(window_size); |
92 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, | 95 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, |
93 kbd_window.data()); | 96 kbd_window.data()); |
94 render_mangler_.reset(new LappedTransform( | 97 render_mangler_.reset(new LappedTransform( |
95 num_render_channels_, num_render_channels_, chunk_length_, | 98 num_render_channels_, num_render_channels_, chunk_length_, |
96 kbd_window.data(), window_size, window_size / 2, this)); | 99 kbd_window.data(), window_size, window_size / 2, this)); |
100 | |
101 std::vector<float> template_queue_element(num_noise_bins); | |
102 noise_estimation_queue_.reset( | |
the sun
2016/03/09 09:02:55
Can you make to without the scoped_ptr, given this
aluebs-webrtc
2016/03/09 12:18:50
Good point. Done.
| |
103 new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>( | |
104 kMaxNumNoiseEstimatesToBuffer, template_queue_element, | |
105 RenderQueueItemVerifier<float>(num_noise_bins))); | |
97 } | 106 } |
98 | 107 |
99 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( | 108 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
100 std::vector<float> noise) { | 109 std::vector<float> noise) { |
101 if (capture_filter_bank_.size() != bank_size_ || | 110 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); |
102 capture_filter_bank_[0].size() != noise.size()) { | 111 // Disregarding return value since buffer overflow is acceptable, because it |
103 capture_filter_bank_ = CreateErbBank(noise.size()); | 112 // is not critical to get each noise estimate. |
104 noise_power_estimator_.reset( | 113 if(noise_estimation_queue_->Insert(&noise)) {}; |
the sun
2016/03/09 09:02:55
Something missing here?
aluebs-webrtc
2016/03/09 12:18:50
No, that is a way of making all compilers happy an
the sun
2016/03/09 12:28:53
Ah, I need to learn how to read. :)
nit: space be
aluebs-webrtc
2016/03/09 12:58:16
Done.
| |
105 new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate)); | |
106 } | |
107 noise_power_estimator_->Step(noise.data()); | |
108 } | 114 } |
109 | 115 |
110 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 116 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, |
111 int sample_rate_hz, | 117 int sample_rate_hz, |
112 size_t num_channels) { | 118 size_t num_channels) { |
113 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | 119 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); |
114 RTC_CHECK_EQ(num_render_channels_, num_channels); | 120 RTC_CHECK_EQ(num_render_channels_, num_channels); |
121 while (noise_estimation_queue_->Remove(&noise_estimation_buffer_)) { | |
122 noise_power_estimator_.Step(noise_estimation_buffer_.data()); | |
123 } | |
115 is_speech_ = IsSpeech(audio[0]); | 124 is_speech_ = IsSpeech(audio[0]); |
116 render_mangler_->ProcessChunk(audio, audio); | 125 render_mangler_->ProcessChunk(audio, audio); |
117 } | 126 } |
118 | 127 |
119 void IntelligibilityEnhancer::ProcessAudioBlock( | 128 void IntelligibilityEnhancer::ProcessAudioBlock( |
120 const std::complex<float>* const* in_block, | 129 const std::complex<float>* const* in_block, |
121 size_t in_channels, | 130 size_t in_channels, |
122 size_t frames, | 131 size_t frames, |
123 size_t /* out_channels */, | 132 size_t /* out_channels */, |
124 std::complex<float>* const* out_block) { | 133 std::complex<float>* const* out_block) { |
125 RTC_DCHECK_EQ(freqs_, frames); | 134 RTC_DCHECK_EQ(freqs_, frames); |
126 if (is_speech_) { | 135 if (is_speech_) { |
127 clear_power_estimator_.Step(in_block[0]); | 136 clear_power_estimator_.Step(in_block[0]); |
128 } | 137 } |
129 const std::vector<float>& clear_power = clear_power_estimator_.power(); | 138 const std::vector<float>& clear_power = clear_power_estimator_.power(); |
130 const std::vector<float>& noise_power = noise_power_estimator_->power(); | 139 const std::vector<float>& noise_power = noise_power_estimator_.power(); |
131 MapToErbBands(clear_power.data(), render_filter_bank_, | 140 MapToErbBands(clear_power.data(), render_filter_bank_, |
132 filtered_clear_pow_.data()); | 141 filtered_clear_pow_.data()); |
133 MapToErbBands(noise_power.data(), capture_filter_bank_, | 142 MapToErbBands(noise_power.data(), capture_filter_bank_, |
134 filtered_noise_pow_.data()); | 143 filtered_noise_pow_.data()); |
135 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 144 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); |
136 const float power_target = | 145 const float power_target = |
137 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); | 146 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); |
138 const float power_top = | 147 const float power_top = |
139 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 148 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
140 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 149 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); |
(...skipping 158 matching lines...) Loading... | |
299 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 308 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
300 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 309 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
301 chunks_since_voice_ = 0; | 310 chunks_since_voice_ = 0; |
302 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 311 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
303 ++chunks_since_voice_; | 312 ++chunks_since_voice_; |
304 } | 313 } |
305 return chunks_since_voice_ < kSpeechOffsetDelay; | 314 return chunks_since_voice_ < kSpeechOffsetDelay; |
306 } | 315 } |
307 | 316 |
308 } // namespace webrtc | 317 } // namespace webrtc |
OLD | NEW |