Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 32 | 32 |
| 33 const size_t kErbResolution = 2; | 33 const size_t kErbResolution = 2; |
| 34 const int kWindowSizeMs = 2; | 34 const int kWindowSizeMs = 2; |
| 35 const int kChunkSizeMs = 10; // Size provided by APM. | 35 const int kChunkSizeMs = 10; // Size provided by APM. |
| 36 const float kClipFreq = 200.0f; | 36 const float kClipFreq = 200.0f; |
| 37 const float kConfigRho = 0.02f; // Default production and interpretation SNR. | 37 const float kConfigRho = 0.02f; // Default production and interpretation SNR. |
| 38 const float kKbdAlpha = 1.5f; | 38 const float kKbdAlpha = 1.5f; |
| 39 const float kLambdaBot = -1.0f; // Extreme values in bisection | 39 const float kLambdaBot = -1.0f; // Extreme values in bisection |
| 40 const float kLambdaTop = -10e-18f; // search for lamda. | 40 const float kLambdaTop = -10e-18f; // search for lamda. |
| 41 | 41 |
| 42 // Returns dot product of vectors |a| and |b| with size |length|. | |
| 43 float DotProduct(const float* a, const float* b, size_t length) { | |
| 44 float ret = 0.f; | |
| 45 for (size_t i = 0; i < length; ++i) { | |
| 46 ret = fmaf(a[i], b[i], ret); | |
| 47 } | |
| 48 return ret; | |
| 49 } | |
| 50 | |
| 51 // Computes the power across ERB filters from the power spectral density |var|. | |
| 52 // Stores it in |result|. | |
| 53 void FilterVariance(const float* var, | |
| 54 const std::vector<std::vector<float>>& filter_bank, | |
| 55 float* result) { | |
| 56 for (size_t i = 0; i < filter_bank.size(); ++i) { | |
| 57 RTC_DCHECK_GT(filter_bank[i].size(), 0u); | |
| 58 result[i] = DotProduct(&filter_bank[i][0], var, filter_bank[i].size()); | |
| 59 } | |
| 60 } | |
| 61 | |
| 42 } // namespace | 62 } // namespace |
| 43 | 63 |
| 44 using std::complex; | 64 using std::complex; |
| 45 using std::max; | 65 using std::max; |
| 46 using std::min; | 66 using std::min; |
| 47 using VarianceType = intelligibility::VarianceArray::StepType; | 67 using VarianceType = intelligibility::VarianceArray::StepType; |
| 48 | 68 |
| 49 IntelligibilityEnhancer::TransformCallback::TransformCallback( | 69 IntelligibilityEnhancer::TransformCallback::TransformCallback( |
| 50 IntelligibilityEnhancer* parent, | 70 IntelligibilityEnhancer* parent) |
| 51 IntelligibilityEnhancer::AudioSource source) | 71 : parent_(parent) { |
| 52 : parent_(parent), source_(source) { | |
| 53 } | 72 } |
| 54 | 73 |
| 55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( | 74 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( |
| 56 const complex<float>* const* in_block, | 75 const complex<float>* const* in_block, |
| 57 size_t in_channels, | 76 size_t in_channels, |
| 58 size_t frames, | 77 size_t frames, |
| 59 size_t /* out_channels */, | 78 size_t /* out_channels */, |
| 60 complex<float>* const* out_block) { | 79 complex<float>* const* out_block) { |
| 61 RTC_DCHECK_EQ(parent_->freqs_, frames); | 80 RTC_DCHECK_EQ(parent_->freqs_, frames); |
| 62 for (size_t i = 0; i < in_channels; ++i) { | 81 for (size_t i = 0; i < in_channels; ++i) { |
| 63 parent_->DispatchAudio(source_, in_block[i], out_block[i]); | 82 parent_->ProcessClearBlock(in_block[i], out_block[i]); |
| 64 } | 83 } |
| 65 } | 84 } |
| 66 | 85 |
| 67 IntelligibilityEnhancer::IntelligibilityEnhancer() | 86 IntelligibilityEnhancer::IntelligibilityEnhancer() |
| 68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { | 87 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { |
| 69 } | 88 } |
| 70 | 89 |
| 71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) | 90 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) |
| 72 : freqs_(RealFourier::ComplexLength( | 91 : freqs_(RealFourier::ComplexLength( |
| 73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), | 92 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), |
| 74 window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))), | 93 window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))), |
| 75 chunk_length_( | 94 chunk_length_( |
| 76 static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)), | 95 static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)), |
| 77 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), | 96 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), |
| 78 sample_rate_hz_(config.sample_rate_hz), | 97 sample_rate_hz_(config.sample_rate_hz), |
| 79 erb_resolution_(kErbResolution), | 98 erb_resolution_(kErbResolution), |
| 80 num_capture_channels_(config.num_capture_channels), | 99 num_capture_channels_(config.num_capture_channels), |
| 81 num_render_channels_(config.num_render_channels), | 100 num_render_channels_(config.num_render_channels), |
| 82 analysis_rate_(config.analysis_rate), | 101 analysis_rate_(config.analysis_rate), |
| 83 active_(true), | 102 active_(true), |
| 84 clear_variance_(freqs_, | 103 clear_variance_(freqs_, |
| 85 config.var_type, | 104 config.var_type, |
| 86 config.var_window_size, | 105 config.var_window_size, |
| 87 config.var_decay_rate), | 106 config.var_decay_rate), |
| 88 noise_variance_(freqs_, | |
| 89 config.var_type, | |
| 90 config.var_window_size, | |
| 91 config.var_decay_rate), | |
| 92 filtered_clear_var_(new float[bank_size_]), | 107 filtered_clear_var_(new float[bank_size_]), |
| 93 filtered_noise_var_(new float[bank_size_]), | 108 filtered_noise_var_(new float[bank_size_]), |
| 94 filter_bank_(bank_size_), | |
| 95 center_freqs_(new float[bank_size_]), | 109 center_freqs_(new float[bank_size_]), |
| 110 render_filter_bank_(CreateErbBank(freqs_)), | |
| 96 rho_(new float[bank_size_]), | 111 rho_(new float[bank_size_]), |
| 97 gains_eq_(new float[bank_size_]), | 112 gains_eq_(new float[bank_size_]), |
| 98 gain_applier_(freqs_, config.gain_change_limit), | 113 gain_applier_(freqs_, config.gain_change_limit), |
| 99 temp_render_out_buffer_(chunk_length_, num_render_channels_), | 114 temp_render_out_buffer_(chunk_length_, num_render_channels_), |
| 100 temp_capture_out_buffer_(chunk_length_, num_capture_channels_), | |
| 101 kbd_window_(new float[window_size_]), | 115 kbd_window_(new float[window_size_]), |
| 102 render_callback_(this, AudioSource::kRenderStream), | 116 render_callback_(this), |
| 103 capture_callback_(this, AudioSource::kCaptureStream), | |
| 104 block_count_(0), | 117 block_count_(0), |
| 105 analysis_step_(0) { | 118 analysis_step_(0) { |
| 106 RTC_DCHECK_LE(config.rho, 1.0f); | 119 RTC_DCHECK_LE(config.rho, 1.0f); |
| 107 | 120 |
| 108 CreateErbBank(); | 121 memset(filtered_clear_var_.get(), |
| 122 0, | |
| 123 bank_size_ * sizeof(filtered_clear_var_[0])); | |
| 124 memset(filtered_noise_var_.get(), | |
| 125 0, | |
| 126 bank_size_ * sizeof(filtered_noise_var_[0])); | |
| 109 | 127 |
| 110 // Assumes all rho equal. | 128 // Assumes all rho equal. |
| 111 for (size_t i = 0; i < bank_size_; ++i) { | 129 for (size_t i = 0; i < bank_size_; ++i) { |
| 112 rho_[i] = config.rho * config.rho; | 130 rho_[i] = config.rho * config.rho; |
| 113 } | 131 } |
| 114 | 132 |
| 115 float freqs_khz = kClipFreq / 1000.0f; | 133 float freqs_khz = kClipFreq / 1000.0f; |
| 116 size_t erb_index = static_cast<size_t>(ceilf( | 134 size_t erb_index = static_cast<size_t>(ceilf( |
| 117 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); | 135 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); |
| 118 start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_); | 136 start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_); |
| 119 | 137 |
| 120 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, | 138 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, |
| 121 kbd_window_.get()); | 139 kbd_window_.get()); |
| 122 render_mangler_.reset(new LappedTransform( | 140 render_mangler_.reset(new LappedTransform( |
| 123 num_render_channels_, num_render_channels_, chunk_length_, | 141 num_render_channels_, num_render_channels_, chunk_length_, |
| 124 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); | 142 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); |
| 125 capture_mangler_.reset(new LappedTransform( | 143 } |
| 126 num_capture_channels_, num_capture_channels_, chunk_length_, | 144 |
| 127 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); | 145 void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
| 146 std::vector<float> noise) { | |
| 147 if (capture_filter_bank_.size() != bank_size_ || | |
| 148 capture_filter_bank_[0].size() != noise.size()) { | |
| 149 capture_filter_bank_ = CreateErbBank(noise.size()); | |
| 150 } | |
| 151 for (size_t i = 0; i < noise.size(); ++i) { | |
| 152 noise[i] *= noise[i]; | |
| 153 } | |
| 154 FilterVariance(&noise[0], | |
|
turaj
2016/02/09 16:40:33
Bastiaan's idea is one does not need to update IE
aluebs-webrtc
2016/02/09 19:13:35
Good point! I moved the FilterVariance to where it
| |
| 155 capture_filter_bank_, | |
| 156 filtered_noise_var_.get()); | |
| 128 } | 157 } |
| 129 | 158 |
| 130 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 159 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, |
| 131 int sample_rate_hz, | 160 int sample_rate_hz, |
| 132 size_t num_channels) { | 161 size_t num_channels) { |
| 133 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | 162 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); |
| 134 RTC_CHECK_EQ(num_render_channels_, num_channels); | 163 RTC_CHECK_EQ(num_render_channels_, num_channels); |
| 135 | 164 |
| 136 if (active_) { | 165 if (active_) { |
| 137 render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels()); | 166 render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels()); |
| 138 } | 167 } |
| 139 | 168 |
| 140 if (active_) { | 169 if (active_) { |
| 141 for (size_t i = 0; i < num_render_channels_; ++i) { | 170 for (size_t i = 0; i < num_render_channels_; ++i) { |
| 142 memcpy(audio[i], temp_render_out_buffer_.channels()[i], | 171 memcpy(audio[i], temp_render_out_buffer_.channels()[i], |
| 143 chunk_length_ * sizeof(**audio)); | 172 chunk_length_ * sizeof(**audio)); |
| 144 } | 173 } |
| 145 } | 174 } |
| 146 } | 175 } |
| 147 | 176 |
| 148 void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio, | |
| 149 int sample_rate_hz, | |
| 150 size_t num_channels) { | |
| 151 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); | |
| 152 RTC_CHECK_EQ(num_capture_channels_, num_channels); | |
| 153 | |
| 154 capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels()); | |
| 155 } | |
| 156 | |
| 157 void IntelligibilityEnhancer::DispatchAudio( | |
| 158 IntelligibilityEnhancer::AudioSource source, | |
| 159 const complex<float>* in_block, | |
| 160 complex<float>* out_block) { | |
| 161 switch (source) { | |
| 162 case kRenderStream: | |
| 163 ProcessClearBlock(in_block, out_block); | |
| 164 break; | |
| 165 case kCaptureStream: | |
| 166 ProcessNoiseBlock(in_block, out_block); | |
| 167 break; | |
| 168 } | |
| 169 } | |
| 170 | |
| 171 void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, | 177 void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, |
| 172 complex<float>* out_block) { | 178 complex<float>* out_block) { |
| 173 if (block_count_ < 2) { | 179 if (block_count_ < 2) { |
| 174 memset(out_block, 0, freqs_ * sizeof(*out_block)); | 180 memset(out_block, 0, freqs_ * sizeof(*out_block)); |
| 175 ++block_count_; | 181 ++block_count_; |
| 176 return; | 182 return; |
| 177 } | 183 } |
| 178 | 184 |
| 179 // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary. | 185 // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary. |
| 180 if (true) { | 186 if (true) { |
| 181 clear_variance_.Step(in_block, false); | 187 clear_variance_.Step(in_block, false); |
| 182 if (block_count_ % analysis_rate_ == analysis_rate_ - 1) { | 188 if (block_count_ % analysis_rate_ == analysis_rate_ - 1) { |
| 183 const float power_target = std::accumulate( | 189 const float power_target = std::accumulate( |
| 184 clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f); | 190 clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f); |
| 185 AnalyzeClearBlock(power_target); | 191 AnalyzeClearBlock(power_target); |
| 186 ++analysis_step_; | 192 ++analysis_step_; |
| 187 } | 193 } |
| 188 ++block_count_; | 194 ++block_count_; |
| 189 } | 195 } |
| 190 | 196 |
| 191 if (active_) { | 197 if (active_) { |
| 192 gain_applier_.Apply(in_block, out_block); | 198 gain_applier_.Apply(in_block, out_block); |
| 193 } | 199 } |
| 194 } | 200 } |
| 195 | 201 |
| 196 void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { | 202 void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { |
| 197 FilterVariance(clear_variance_.variance(), filtered_clear_var_.get()); | 203 FilterVariance(clear_variance_.variance(), |
| 198 FilterVariance(noise_variance_.variance(), filtered_noise_var_.get()); | 204 render_filter_bank_, |
| 199 | 205 filtered_clear_var_.get()); |
| 200 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); | 206 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); |
| 201 const float power_top = | 207 const float power_top = |
| 202 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); | 208 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); |
| 203 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); | 209 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); |
| 204 const float power_bot = | 210 const float power_bot = |
| 205 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); | 211 DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); |
| 206 if (power_target >= power_bot && power_target <= power_top) { | 212 if (power_target >= power_bot && power_target <= power_top) { |
| 207 SolveForLambda(power_target, power_bot, power_top); | 213 SolveForLambda(power_target, power_bot, power_top); |
| 208 UpdateErbGains(); | 214 UpdateErbGains(); |
| 209 } // Else experiencing variance underflow, so do nothing. | 215 } // Else experiencing variance underflow, so do nothing. |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 235 ++iters; | 241 ++iters; |
| 236 } | 242 } |
| 237 } | 243 } |
| 238 | 244 |
| 239 void IntelligibilityEnhancer::UpdateErbGains() { | 245 void IntelligibilityEnhancer::UpdateErbGains() { |
| 240 // (ERB gain) = filterbank' * (freq gain) | 246 // (ERB gain) = filterbank' * (freq gain) |
| 241 float* gains = gain_applier_.target(); | 247 float* gains = gain_applier_.target(); |
| 242 for (size_t i = 0; i < freqs_; ++i) { | 248 for (size_t i = 0; i < freqs_; ++i) { |
| 243 gains[i] = 0.0f; | 249 gains[i] = 0.0f; |
| 244 for (size_t j = 0; j < bank_size_; ++j) { | 250 for (size_t j = 0; j < bank_size_; ++j) { |
| 245 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); | 251 gains[i] = fmaf(render_filter_bank_[j][i], gains_eq_[j], gains[i]); |
| 246 } | 252 } |
| 247 } | 253 } |
| 248 } | 254 } |
| 249 | 255 |
| 250 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, | |
| 251 complex<float>* /*out_block*/) { | |
| 252 noise_variance_.Step(in_block); | |
| 253 } | |
| 254 | |
| 255 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, | 256 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, |
| 256 size_t erb_resolution) { | 257 size_t erb_resolution) { |
| 257 float freq_limit = sample_rate / 2000.0f; | 258 float freq_limit = sample_rate / 2000.0f; |
| 258 size_t erb_scale = static_cast<size_t>(ceilf( | 259 size_t erb_scale = static_cast<size_t>(ceilf( |
| 259 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f)); | 260 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f)); |
| 260 return erb_scale * erb_resolution; | 261 return erb_scale * erb_resolution; |
| 261 } | 262 } |
| 262 | 263 |
| 263 void IntelligibilityEnhancer::CreateErbBank() { | 264 std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank( |
| 265 size_t num_freqs) { | |
| 266 std::vector<std::vector<float>> filter_bank(bank_size_); | |
| 264 size_t lf = 1, rf = 4; | 267 size_t lf = 1, rf = 4; |
| 265 | 268 |
| 266 for (size_t i = 0; i < bank_size_; ++i) { | 269 for (size_t i = 0; i < bank_size_; ++i) { |
| 267 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); | 270 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); |
| 268 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); | 271 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); |
| 269 center_freqs_[i] -= 14678.49f; | 272 center_freqs_[i] -= 14678.49f; |
| 270 } | 273 } |
| 271 float last_center_freq = center_freqs_[bank_size_ - 1]; | 274 float last_center_freq = center_freqs_[bank_size_ - 1]; |
| 272 for (size_t i = 0; i < bank_size_; ++i) { | 275 for (size_t i = 0; i < bank_size_; ++i) { |
| 273 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; | 276 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; |
| 274 } | 277 } |
| 275 | 278 |
| 276 for (size_t i = 0; i < bank_size_; ++i) { | 279 for (size_t i = 0; i < bank_size_; ++i) { |
| 277 filter_bank_[i].resize(freqs_); | 280 filter_bank[i].resize(num_freqs); |
| 278 } | 281 } |
| 279 | 282 |
| 280 for (size_t i = 1; i <= bank_size_; ++i) { | 283 for (size_t i = 1; i <= bank_size_; ++i) { |
| 281 size_t lll, ll, rr, rrr; | 284 size_t lll, ll, rr, rrr; |
| 282 static const size_t kOne = 1; // Avoids repeated static_cast<>s below. | 285 static const size_t kOne = 1; // Avoids repeated static_cast<>s below. |
| 283 lll = static_cast<size_t>(round( | 286 lll = static_cast<size_t>(round( |
| 284 center_freqs_[max(kOne, i - lf) - 1] * freqs_ / | 287 center_freqs_[max(kOne, i - lf) - 1] * num_freqs / |
| 285 (0.5f * sample_rate_hz_))); | 288 (0.5f * sample_rate_hz_))); |
| 286 ll = static_cast<size_t>(round( | 289 ll = static_cast<size_t>(round( |
| 287 center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_))); | 290 center_freqs_[max(kOne, i) - 1] * num_freqs / |
| 288 lll = min(freqs_, max(lll, kOne)) - 1; | 291 (0.5f * sample_rate_hz_))); |
| 289 ll = min(freqs_, max(ll, kOne)) - 1; | 292 lll = min(num_freqs, max(lll, kOne)) - 1; |
| 293 ll = min(num_freqs, max(ll, kOne)) - 1; | |
| 290 | 294 |
| 291 rrr = static_cast<size_t>(round( | 295 rrr = static_cast<size_t>(round( |
| 292 center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / | 296 center_freqs_[min(bank_size_, i + rf) - 1] * num_freqs / |
| 293 (0.5f * sample_rate_hz_))); | 297 (0.5f * sample_rate_hz_))); |
| 294 rr = static_cast<size_t>(round( | 298 rr = static_cast<size_t>(round( |
| 295 center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / | 299 center_freqs_[min(bank_size_, i + 1) - 1] * num_freqs / |
| 296 (0.5f * sample_rate_hz_))); | 300 (0.5f * sample_rate_hz_))); |
| 297 rrr = min(freqs_, max(rrr, kOne)) - 1; | 301 rrr = min(num_freqs, max(rrr, kOne)) - 1; |
| 298 rr = min(freqs_, max(rr, kOne)) - 1; | 302 rr = min(num_freqs, max(rr, kOne)) - 1; |
| 299 | 303 |
| 300 float step, element; | 304 float step, element; |
| 301 | 305 |
| 302 step = 1.0f / (ll - lll); | 306 step = 1.0f / (ll - lll); |
| 303 element = 0.0f; | 307 element = 0.0f; |
| 304 for (size_t j = lll; j <= ll; ++j) { | 308 for (size_t j = lll; j <= ll; ++j) { |
| 305 filter_bank_[i - 1][j] = element; | 309 filter_bank[i - 1][j] = element; |
| 306 element += step; | 310 element += step; |
| 307 } | 311 } |
| 308 step = 1.0f / (rrr - rr); | 312 step = 1.0f / (rrr - rr); |
| 309 element = 1.0f; | 313 element = 1.0f; |
| 310 for (size_t j = rr; j <= rrr; ++j) { | 314 for (size_t j = rr; j <= rrr; ++j) { |
| 311 filter_bank_[i - 1][j] = element; | 315 filter_bank[i - 1][j] = element; |
| 312 element -= step; | 316 element -= step; |
| 313 } | 317 } |
| 314 for (size_t j = ll; j <= rr; ++j) { | 318 for (size_t j = ll; j <= rr; ++j) { |
| 315 filter_bank_[i - 1][j] = 1.0f; | 319 filter_bank[i - 1][j] = 1.0f; |
| 316 } | 320 } |
| 317 } | 321 } |
| 318 | 322 |
| 319 float sum; | 323 float sum; |
| 320 for (size_t i = 0; i < freqs_; ++i) { | 324 for (size_t i = 0; i < num_freqs; ++i) { |
| 321 sum = 0.0f; | 325 sum = 0.0f; |
| 322 for (size_t j = 0; j < bank_size_; ++j) { | 326 for (size_t j = 0; j < bank_size_; ++j) { |
| 323 sum += filter_bank_[j][i]; | 327 sum += filter_bank[j][i]; |
| 324 } | 328 } |
| 325 for (size_t j = 0; j < bank_size_; ++j) { | 329 for (size_t j = 0; j < bank_size_; ++j) { |
| 326 filter_bank_[j][i] /= sum; | 330 filter_bank[j][i] /= sum; |
| 327 } | 331 } |
| 328 } | 332 } |
| 333 return filter_bank; | |
| 329 } | 334 } |
| 330 | 335 |
| 331 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, | 336 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
| 332 size_t start_freq, | 337 size_t start_freq, |
| 333 float* sols) { | 338 float* sols) { |
| 334 bool quadratic = (kConfigRho < 1.0f); | 339 bool quadratic = (kConfigRho < 1.0f); |
| 335 const float* var_x0 = filtered_clear_var_.get(); | 340 const float* var_x0 = filtered_clear_var_.get(); |
| 336 const float* var_n0 = filtered_noise_var_.get(); | 341 const float* var_n0 = filtered_noise_var_.get(); |
| 337 | 342 |
| 338 for (size_t n = 0; n < start_freq; ++n) { | 343 for (size_t n = 0; n < start_freq; ++n) { |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 349 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; | 354 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; |
| 350 sols[n] = | 355 sols[n] = |
| 351 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); | 356 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); |
| 352 } else { | 357 } else { |
| 353 sols[n] = -gamma0 / beta0; | 358 sols[n] = -gamma0 / beta0; |
| 354 } | 359 } |
| 355 sols[n] = fmax(0, sols[n]); | 360 sols[n] = fmax(0, sols[n]); |
| 356 } | 361 } |
| 357 } | 362 } |
| 358 | 363 |
| 359 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { | |
| 360 RTC_DCHECK_GT(freqs_, 0u); | |
| 361 for (size_t i = 0; i < bank_size_; ++i) { | |
| 362 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); | |
| 363 } | |
| 364 } | |
| 365 | |
| 366 float IntelligibilityEnhancer::DotProduct(const float* a, | |
| 367 const float* b, | |
| 368 size_t length) { | |
| 369 float ret = 0.0f; | |
| 370 | |
| 371 for (size_t i = 0; i < length; ++i) { | |
| 372 ret = fmaf(a[i], b[i], ret); | |
| 373 } | |
| 374 return ret; | |
| 375 } | |
| 376 | |
| 377 bool IntelligibilityEnhancer::active() const { | 364 bool IntelligibilityEnhancer::active() const { |
| 378 return active_; | 365 return active_; |
| 379 } | 366 } |
| 380 | 367 |
| 381 } // namespace webrtc | 368 } // namespace webrtc |
| OLD | NEW |