OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
54 float* result) { | 54 float* result) { |
55 for (size_t i = 0; i < filter_bank.size(); ++i) { | 55 for (size_t i = 0; i < filter_bank.size(); ++i) { |
56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); | 56 RTC_DCHECK_GT(filter_bank[i].size(), 0u); |
57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); | 57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); |
58 } | 58 } |
59 } | 59 } |
60 | 60 |
61 } // namespace | 61 } // namespace |
62 | 62 |
63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, | 63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, |
64 size_t num_render_channels) | 64 size_t num_render_channels, |
65 size_t num_noise_bins) | |
65 : freqs_(RealFourier::ComplexLength( | 66 : freqs_(RealFourier::ComplexLength( |
66 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), |
68 num_noise_bins_(num_noise_bins), | |
67 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), | 69 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), |
68 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), | 70 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), |
69 sample_rate_hz_(sample_rate_hz), | 71 sample_rate_hz_(sample_rate_hz), |
70 num_render_channels_(num_render_channels), | 72 num_render_channels_(num_render_channels), |
71 clear_power_estimator_(freqs_, kDecayRate), | 73 clear_power_estimator_(freqs_, kDecayRate), |
72 noise_power_estimator_( | 74 noise_power_estimator_(num_noise_bins, kDecayRate), |
73 new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)), | |
74 filtered_clear_pow_(bank_size_, 0.f), | 75 filtered_clear_pow_(bank_size_, 0.f), |
75 filtered_noise_pow_(bank_size_, 0.f), | 76 filtered_noise_pow_(num_noise_bins, 0.f), |
76 center_freqs_(bank_size_), | 77 center_freqs_(bank_size_), |
78 capture_filter_bank_(CreateErbBank(num_noise_bins)), | |
77 render_filter_bank_(CreateErbBank(freqs_)), | 79 render_filter_bank_(CreateErbBank(freqs_)), |
78 gains_eq_(bank_size_), | 80 gains_eq_(bank_size_), |
79 gain_applier_(freqs_, kMaxRelativeGainChange), | 81 gain_applier_(freqs_, kMaxRelativeGainChange), |
80 audio_s16_(chunk_length_), | 82 audio_s16_(chunk_length_), |
81 chunks_since_voice_(kSpeechOffsetDelay), | 83 chunks_since_voice_(kSpeechOffsetDelay), |
82 is_speech_(false) { | 84 is_speech_(false), |
85 noise_estimation_buffer_(num_noise_bins), | |
86 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, | |
87 std::vector<float>(num_noise_bins), | |
88 RenderQueueItemVerifier<float>(num_noise_bins)) { | |
83 RTC_DCHECK_LE(kRho, 1.f); | 89 RTC_DCHECK_LE(kRho, 1.f); |
84 | 90 |
85 const size_t erb_index = static_cast<size_t>( | 91 const size_t erb_index = static_cast<size_t>( |
86 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + | 92 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + |
87 43.f)); | 93 43.f)); |
88 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); | 94 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); |
89 | 95 |
90 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_)); | 96 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_)); |
91 std::vector<float> kbd_window(window_size); | 97 std::vector<float> kbd_window(window_size); |
92 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, | 98 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, |
93 kbd_window.data()); | 99 kbd_window.data()); |
94 render_mangler_.reset(new LappedTransform( | 100 render_mangler_.reset(new LappedTransform( |
95 num_render_channels_, num_render_channels_, chunk_length_, | 101 num_render_channels_, num_render_channels_, chunk_length_, |
96 kbd_window.data(), window_size, window_size / 2, this)); | 102 kbd_window.data(), window_size, window_size / 2, this)); |
97 } | 103 } |
98 | 104 |
99 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( | 105 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
100 std::vector<float> noise) { | 106 std::vector<float> noise) { |
101 if (capture_filter_bank_.size() != bank_size_ || | 107 RTC_DCHECK_EQ(noise.size(), num_noise_bins_); |
102 capture_filter_bank_[0].size() != noise.size()) { | 108 // Disregarding return value since buffer overflow is acceptable, because it |
103 capture_filter_bank_ = CreateErbBank(noise.size()); | 109 // is not critical to get each noise estimate. |
104 noise_power_estimator_.reset( | 110 if (noise_estimation_queue_.Insert(&noise)) { |
105 new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate)); | 111 }; |
tommi (sloooow) - chröme
2016/03/10 21:59:12
nit: remove the if() and then you can leave in the
aluebs-webrtc
2016/03/11 09:36:48
That is a way of making all compilers happy and no
| |
106 } | |
107 noise_power_estimator_->Step(noise.data()); | |
108 } | 112 } |
109 | 113 |
110 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 114 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, |
111 int sample_rate_hz, | 115 int sample_rate_hz, |
112 size_t num_channels) { | 116 size_t num_channels) { |
113 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | 117 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); |
114 RTC_CHECK_EQ(num_render_channels_, num_channels); | 118 RTC_CHECK_EQ(num_render_channels_, num_channels); |
119 while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { | |
120 noise_power_estimator_.Step(noise_estimation_buffer_.data()); | |
121 } | |
115 is_speech_ = IsSpeech(audio[0]); | 122 is_speech_ = IsSpeech(audio[0]); |
116 render_mangler_->ProcessChunk(audio, audio); | 123 render_mangler_->ProcessChunk(audio, audio); |
117 } | 124 } |
118 | 125 |
119 void IntelligibilityEnhancer::ProcessAudioBlock( | 126 void IntelligibilityEnhancer::ProcessAudioBlock( |
120 const std::complex<float>* const* in_block, | 127 const std::complex<float>* const* in_block, |
121 size_t in_channels, | 128 size_t in_channels, |
122 size_t frames, | 129 size_t frames, |
123 size_t /* out_channels */, | 130 size_t /* out_channels */, |
124 std::complex<float>* const* out_block) { | 131 std::complex<float>* const* out_block) { |
125 RTC_DCHECK_EQ(freqs_, frames); | 132 RTC_DCHECK_EQ(freqs_, frames); |
126 if (is_speech_) { | 133 if (is_speech_) { |
127 clear_power_estimator_.Step(in_block[0]); | 134 clear_power_estimator_.Step(in_block[0]); |
128 } | 135 } |
129 const std::vector<float>& clear_power = clear_power_estimator_.power(); | 136 const std::vector<float>& clear_power = clear_power_estimator_.power(); |
130 const std::vector<float>& noise_power = noise_power_estimator_->power(); | 137 const std::vector<float>& noise_power = noise_power_estimator_.power(); |
131 MapToErbBands(clear_power.data(), render_filter_bank_, | 138 MapToErbBands(clear_power.data(), render_filter_bank_, |
132 filtered_clear_pow_.data()); | 139 filtered_clear_pow_.data()); |
133 MapToErbBands(noise_power.data(), capture_filter_bank_, | 140 MapToErbBands(noise_power.data(), capture_filter_bank_, |
134 filtered_noise_pow_.data()); | 141 filtered_noise_pow_.data()); |
135 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); | 142 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); |
136 const float power_target = | 143 const float power_target = |
137 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); | 144 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f); |
138 const float power_top = | 145 const float power_top = |
139 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); | 146 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); |
140 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); | 147 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
299 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); | 306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); |
300 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { | 307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { |
301 chunks_since_voice_ = 0; | 308 chunks_since_voice_ = 0; |
302 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { | 309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) { |
303 ++chunks_since_voice_; | 310 ++chunks_since_voice_; |
304 } | 311 } |
305 return chunks_since_voice_ < kSpeechOffsetDelay; | 312 return chunks_since_voice_ < kSpeechOffsetDelay; |
306 } | 313 } |
307 | 314 |
308 } // namespace webrtc | 315 } // namespace webrtc |
OLD | NEW |