| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 #include <numeric> | 23 #include <numeric> |
| 24 | 24 |
| 25 #include "webrtc/base/checks.h" | 25 #include "webrtc/base/checks.h" |
| 26 #include "webrtc/common_audio/include/audio_util.h" | 26 #include "webrtc/common_audio/include/audio_util.h" |
| 27 #include "webrtc/common_audio/window_generator.h" | 27 #include "webrtc/common_audio/window_generator.h" |
| 28 | 28 |
| 29 namespace webrtc { | 29 namespace webrtc { |
| 30 | 30 |
| 31 namespace { | 31 namespace { |
| 32 | 32 |
| 33 const int kErbResolution = 2; | 33 const size_t kErbResolution = 2; |
| 34 const int kWindowSizeMs = 2; | 34 const int kWindowSizeMs = 2; |
| 35 const int kChunkSizeMs = 10; // Size provided by APM. | 35 const int kChunkSizeMs = 10; // Size provided by APM. |
| 36 const float kClipFreq = 200.0f; | 36 const float kClipFreq = 200.0f; |
| 37 const float kConfigRho = 0.02f; // Default production and interpretation SNR. | 37 const float kConfigRho = 0.02f; // Default production and interpretation SNR. |
| 38 const float kKbdAlpha = 1.5f; | 38 const float kKbdAlpha = 1.5f; |
| 39 const float kLambdaBot = -1.0f; // Extreme values in bisection | 39 const float kLambdaBot = -1.0f; // Extreme values in bisection |
| 40 const float kLambdaTop = -10e-18f; // search for lamda. | 40 const float kLambdaTop = -10e-18f; // search for lamda. |
| 41 | 41 |
| 42 } // namespace | 42 } // namespace |
| 43 | 43 |
| 44 using std::complex; | 44 using std::complex; |
| 45 using std::max; | 45 using std::max; |
| 46 using std::min; | 46 using std::min; |
| 47 using VarianceType = intelligibility::VarianceArray::StepType; | 47 using VarianceType = intelligibility::VarianceArray::StepType; |
| 48 | 48 |
| 49 IntelligibilityEnhancer::TransformCallback::TransformCallback( | 49 IntelligibilityEnhancer::TransformCallback::TransformCallback( |
| 50 IntelligibilityEnhancer* parent, | 50 IntelligibilityEnhancer* parent, |
| 51 IntelligibilityEnhancer::AudioSource source) | 51 IntelligibilityEnhancer::AudioSource source) |
| 52 : parent_(parent), source_(source) { | 52 : parent_(parent), source_(source) { |
| 53 } | 53 } |
| 54 | 54 |
| 55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( | 55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( |
| 56 const complex<float>* const* in_block, | 56 const complex<float>* const* in_block, |
| 57 int in_channels, | 57 int in_channels, |
| 58 int frames, | 58 size_t frames, |
| 59 int /* out_channels */, | 59 int /* out_channels */, |
| 60 complex<float>* const* out_block) { | 60 complex<float>* const* out_block) { |
| 61 DCHECK_EQ(parent_->freqs_, frames); | 61 DCHECK_EQ(parent_->freqs_, frames); |
| 62 for (int i = 0; i < in_channels; ++i) { | 62 for (int i = 0; i < in_channels; ++i) { |
| 63 parent_->DispatchAudio(source_, in_block[i], out_block[i]); | 63 parent_->DispatchAudio(source_, in_block[i], out_block[i]); |
| 64 } | 64 } |
| 65 } | 65 } |
| 66 | 66 |
| 67 IntelligibilityEnhancer::IntelligibilityEnhancer() | 67 IntelligibilityEnhancer::IntelligibilityEnhancer() |
| 68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { | 68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { |
| 69 } | 69 } |
| 70 | 70 |
| 71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) | 71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) |
| 72 : freqs_(RealFourier::ComplexLength( | 72 : freqs_(RealFourier::ComplexLength( |
| 73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), | 73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), |
| 74 window_size_(1 << RealFourier::FftOrder(freqs_)), | 74 window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))), |
| 75 chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000), | 75 chunk_length_( |
| 76 static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)), |
| 76 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), | 77 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), |
| 77 sample_rate_hz_(config.sample_rate_hz), | 78 sample_rate_hz_(config.sample_rate_hz), |
| 78 erb_resolution_(kErbResolution), | 79 erb_resolution_(kErbResolution), |
| 79 num_capture_channels_(config.num_capture_channels), | 80 num_capture_channels_(config.num_capture_channels), |
| 80 num_render_channels_(config.num_render_channels), | 81 num_render_channels_(config.num_render_channels), |
| 81 analysis_rate_(config.analysis_rate), | 82 analysis_rate_(config.analysis_rate), |
| 82 active_(true), | 83 active_(true), |
| 83 clear_variance_(freqs_, | 84 clear_variance_(freqs_, |
| 84 config.var_type, | 85 config.var_type, |
| 85 config.var_window_size, | 86 config.var_window_size, |
| (...skipping 14 matching lines...) Expand all Loading... |
| 100 kbd_window_(new float[window_size_]), | 101 kbd_window_(new float[window_size_]), |
| 101 render_callback_(this, AudioSource::kRenderStream), | 102 render_callback_(this, AudioSource::kRenderStream), |
| 102 capture_callback_(this, AudioSource::kCaptureStream), | 103 capture_callback_(this, AudioSource::kCaptureStream), |
| 103 block_count_(0), | 104 block_count_(0), |
| 104 analysis_step_(0) { | 105 analysis_step_(0) { |
| 105 DCHECK_LE(config.rho, 1.0f); | 106 DCHECK_LE(config.rho, 1.0f); |
| 106 | 107 |
| 107 CreateErbBank(); | 108 CreateErbBank(); |
| 108 | 109 |
| 109 // Assumes all rho equal. | 110 // Assumes all rho equal. |
| 110 for (int i = 0; i < bank_size_; ++i) { | 111 for (size_t i = 0; i < bank_size_; ++i) { |
| 111 rho_[i] = config.rho * config.rho; | 112 rho_[i] = config.rho * config.rho; |
| 112 } | 113 } |
| 113 | 114 |
| 114 float freqs_khz = kClipFreq / 1000.0f; | 115 float freqs_khz = kClipFreq / 1000.0f; |
| 115 int erb_index = static_cast<int>(ceilf( | 116 size_t erb_index = static_cast<size_t>(ceilf( |
| 116 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); | 117 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); |
| 117 start_freq_ = max(1, erb_index * erb_resolution_); | 118 start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_); |
| 118 | 119 |
| 119 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, | 120 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, |
| 120 kbd_window_.get()); | 121 kbd_window_.get()); |
| 121 render_mangler_.reset(new LappedTransform( | 122 render_mangler_.reset(new LappedTransform( |
| 122 num_render_channels_, num_render_channels_, chunk_length_, | 123 num_render_channels_, num_render_channels_, chunk_length_, |
| 123 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); | 124 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); |
| 124 capture_mangler_.reset(new LappedTransform( | 125 capture_mangler_.reset(new LappedTransform( |
| 125 num_capture_channels_, num_capture_channels_, chunk_length_, | 126 num_capture_channels_, num_capture_channels_, chunk_length_, |
| 126 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); | 127 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); |
| 127 } | 128 } |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 231 lambda_top = lambda; | 232 lambda_top = lambda; |
| 232 } | 233 } |
| 233 power_ratio = std::fabs(power * reciprocal_power_target); | 234 power_ratio = std::fabs(power * reciprocal_power_target); |
| 234 ++iters; | 235 ++iters; |
| 235 } | 236 } |
| 236 } | 237 } |
| 237 | 238 |
| 238 void IntelligibilityEnhancer::UpdateErbGains() { | 239 void IntelligibilityEnhancer::UpdateErbGains() { |
| 239 // (ERB gain) = filterbank' * (freq gain) | 240 // (ERB gain) = filterbank' * (freq gain) |
| 240 float* gains = gain_applier_.target(); | 241 float* gains = gain_applier_.target(); |
| 241 for (int i = 0; i < freqs_; ++i) { | 242 for (size_t i = 0; i < freqs_; ++i) { |
| 242 gains[i] = 0.0f; | 243 gains[i] = 0.0f; |
| 243 for (int j = 0; j < bank_size_; ++j) { | 244 for (size_t j = 0; j < bank_size_; ++j) { |
| 244 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); | 245 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); |
| 245 } | 246 } |
| 246 } | 247 } |
| 247 } | 248 } |
| 248 | 249 |
| 249 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, | 250 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, |
| 250 complex<float>* /*out_block*/) { | 251 complex<float>* /*out_block*/) { |
| 251 noise_variance_.Step(in_block); | 252 noise_variance_.Step(in_block); |
| 252 } | 253 } |
| 253 | 254 |
| 254 int IntelligibilityEnhancer::GetBankSize(int sample_rate, int erb_resolution) { | 255 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, |
| 256 size_t erb_resolution) { |
| 255 float freq_limit = sample_rate / 2000.0f; | 257 float freq_limit = sample_rate / 2000.0f; |
| 256 int erb_scale = ceilf( | 258 size_t erb_scale = static_cast<size_t>(ceilf( |
| 257 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f); | 259 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f)); |
| 258 return erb_scale * erb_resolution; | 260 return erb_scale * erb_resolution; |
| 259 } | 261 } |
| 260 | 262 |
| 261 void IntelligibilityEnhancer::CreateErbBank() { | 263 void IntelligibilityEnhancer::CreateErbBank() { |
| 262 int lf = 1, rf = 4; | 264 size_t lf = 1, rf = 4; |
| 263 | 265 |
| 264 for (int i = 0; i < bank_size_; ++i) { | 266 for (size_t i = 0; i < bank_size_; ++i) { |
| 265 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); | 267 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_)); |
| 266 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); | 268 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); |
| 267 center_freqs_[i] -= 14678.49f; | 269 center_freqs_[i] -= 14678.49f; |
| 268 } | 270 } |
| 269 float last_center_freq = center_freqs_[bank_size_ - 1]; | 271 float last_center_freq = center_freqs_[bank_size_ - 1]; |
| 270 for (int i = 0; i < bank_size_; ++i) { | 272 for (size_t i = 0; i < bank_size_; ++i) { |
| 271 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; | 273 center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; |
| 272 } | 274 } |
| 273 | 275 |
| 274 for (int i = 0; i < bank_size_; ++i) { | 276 for (size_t i = 0; i < bank_size_; ++i) { |
| 275 filter_bank_[i].resize(freqs_); | 277 filter_bank_[i].resize(freqs_); |
| 276 } | 278 } |
| 277 | 279 |
| 278 for (int i = 1; i <= bank_size_; ++i) { | 280 for (size_t i = 1; i <= bank_size_; ++i) { |
| 279 int lll, ll, rr, rrr; | 281 size_t lll, ll, rr, rrr; |
| 280 lll = round(center_freqs_[max(1, i - lf) - 1] * freqs_ / | 282 static const size_t kOne = 1; // Avoids repeated static_cast<>s below. |
| 281 (0.5f * sample_rate_hz_)); | 283 lll = static_cast<size_t>(round( |
| 282 ll = | 284 center_freqs_[max(kOne, i - lf) - 1] * freqs_ / |
| 283 round(center_freqs_[max(1, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)); | 285 (0.5f * sample_rate_hz_))); |
| 284 lll = min(freqs_, max(lll, 1)) - 1; | 286 ll = static_cast<size_t>(round( |
| 285 ll = min(freqs_, max(ll, 1)) - 1; | 287 center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_))); |
| 288 lll = min(freqs_, max(lll, kOne)) - 1; |
| 289 ll = min(freqs_, max(ll, kOne)) - 1; |
| 286 | 290 |
| 287 rrr = round(center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / | 291 rrr = static_cast<size_t>(round( |
| 288 (0.5f * sample_rate_hz_)); | 292 center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / |
| 289 rr = round(center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / | 293 (0.5f * sample_rate_hz_))); |
| 290 (0.5f * sample_rate_hz_)); | 294 rr = static_cast<size_t>(round( |
| 291 rrr = min(freqs_, max(rrr, 1)) - 1; | 295 center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / |
| 292 rr = min(freqs_, max(rr, 1)) - 1; | 296 (0.5f * sample_rate_hz_))); |
| 297 rrr = min(freqs_, max(rrr, kOne)) - 1; |
| 298 rr = min(freqs_, max(rr, kOne)) - 1; |
| 293 | 299 |
| 294 float step, element; | 300 float step, element; |
| 295 | 301 |
| 296 step = 1.0f / (ll - lll); | 302 step = 1.0f / (ll - lll); |
| 297 element = 0.0f; | 303 element = 0.0f; |
| 298 for (int j = lll; j <= ll; ++j) { | 304 for (size_t j = lll; j <= ll; ++j) { |
| 299 filter_bank_[i - 1][j] = element; | 305 filter_bank_[i - 1][j] = element; |
| 300 element += step; | 306 element += step; |
| 301 } | 307 } |
| 302 step = 1.0f / (rrr - rr); | 308 step = 1.0f / (rrr - rr); |
| 303 element = 1.0f; | 309 element = 1.0f; |
| 304 for (int j = rr; j <= rrr; ++j) { | 310 for (size_t j = rr; j <= rrr; ++j) { |
| 305 filter_bank_[i - 1][j] = element; | 311 filter_bank_[i - 1][j] = element; |
| 306 element -= step; | 312 element -= step; |
| 307 } | 313 } |
| 308 for (int j = ll; j <= rr; ++j) { | 314 for (size_t j = ll; j <= rr; ++j) { |
| 309 filter_bank_[i - 1][j] = 1.0f; | 315 filter_bank_[i - 1][j] = 1.0f; |
| 310 } | 316 } |
| 311 } | 317 } |
| 312 | 318 |
| 313 float sum; | 319 float sum; |
| 314 for (int i = 0; i < freqs_; ++i) { | 320 for (size_t i = 0; i < freqs_; ++i) { |
| 315 sum = 0.0f; | 321 sum = 0.0f; |
| 316 for (int j = 0; j < bank_size_; ++j) { | 322 for (size_t j = 0; j < bank_size_; ++j) { |
| 317 sum += filter_bank_[j][i]; | 323 sum += filter_bank_[j][i]; |
| 318 } | 324 } |
| 319 for (int j = 0; j < bank_size_; ++j) { | 325 for (size_t j = 0; j < bank_size_; ++j) { |
| 320 filter_bank_[j][i] /= sum; | 326 filter_bank_[j][i] /= sum; |
| 321 } | 327 } |
| 322 } | 328 } |
| 323 } | 329 } |
| 324 | 330 |
| 325 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, | 331 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
| 326 int start_freq, | 332 size_t start_freq, |
| 327 float* sols) { | 333 float* sols) { |
| 328 bool quadratic = (kConfigRho < 1.0f); | 334 bool quadratic = (kConfigRho < 1.0f); |
| 329 const float* var_x0 = filtered_clear_var_.get(); | 335 const float* var_x0 = filtered_clear_var_.get(); |
| 330 const float* var_n0 = filtered_noise_var_.get(); | 336 const float* var_n0 = filtered_noise_var_.get(); |
| 331 | 337 |
| 332 for (int n = 0; n < start_freq; ++n) { | 338 for (size_t n = 0; n < start_freq; ++n) { |
| 333 sols[n] = 1.0f; | 339 sols[n] = 1.0f; |
| 334 } | 340 } |
| 335 | 341 |
| 336 // Analytic solution for optimal gains. See paper for derivation. | 342 // Analytic solution for optimal gains. See paper for derivation. |
| 337 for (int n = start_freq - 1; n < bank_size_; ++n) { | 343 for (size_t n = start_freq - 1; n < bank_size_; ++n) { |
| 338 float alpha0, beta0, gamma0; | 344 float alpha0, beta0, gamma0; |
| 339 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + | 345 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + |
| 340 lambda * var_x0[n] * var_n0[n] * var_n0[n]; | 346 lambda * var_x0[n] * var_n0[n] * var_n0[n]; |
| 341 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n]; | 347 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n]; |
| 342 if (quadratic) { | 348 if (quadratic) { |
| 343 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; | 349 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; |
| 344 sols[n] = | 350 sols[n] = |
| 345 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); | 351 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); |
| 346 } else { | 352 } else { |
| 347 sols[n] = -gamma0 / beta0; | 353 sols[n] = -gamma0 / beta0; |
| 348 } | 354 } |
| 349 sols[n] = fmax(0, sols[n]); | 355 sols[n] = fmax(0, sols[n]); |
| 350 } | 356 } |
| 351 } | 357 } |
| 352 | 358 |
| 353 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { | 359 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { |
| 354 DCHECK_GT(freqs_, 0); | 360 DCHECK_GT(freqs_, 0u); |
| 355 for (int i = 0; i < bank_size_; ++i) { | 361 for (size_t i = 0; i < bank_size_; ++i) { |
| 356 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); | 362 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); |
| 357 } | 363 } |
| 358 } | 364 } |
| 359 | 365 |
| 360 float IntelligibilityEnhancer::DotProduct(const float* a, | 366 float IntelligibilityEnhancer::DotProduct(const float* a, |
| 361 const float* b, | 367 const float* b, |
| 362 int length) { | 368 size_t length) { |
| 363 float ret = 0.0f; | 369 float ret = 0.0f; |
| 364 | 370 |
| 365 for (int i = 0; i < length; ++i) { | 371 for (size_t i = 0; i < length; ++i) { |
| 366 ret = fmaf(a[i], b[i], ret); | 372 ret = fmaf(a[i], b[i], ret); |
| 367 } | 373 } |
| 368 return ret; | 374 return ret; |
| 369 } | 375 } |
| 370 | 376 |
| 371 bool IntelligibilityEnhancer::active() const { | 377 bool IntelligibilityEnhancer::active() const { |
| 372 return active_; | 378 return active_; |
| 373 } | 379 } |
| 374 | 380 |
| 375 } // namespace webrtc | 381 } // namespace webrtc |
| OLD | NEW |