Chromium Code Reviews

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1982183002: Pull out the PostFilter to its own NonlinearBeamformer API (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Drop Beamformer Interface Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff |
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 104 matching lines...)
115 } 115 }
116 116
117 return result; 117 return result;
118 } 118 }
119 119
120 // Works for positive numbers only. 120 // Works for positive numbers only.
121 size_t Round(float x) { 121 size_t Round(float x) {
122 return static_cast<size_t>(std::floor(x + 0.5f)); 122 return static_cast<size_t>(std::floor(x + 0.5f));
123 } 123 }
124 124
125 // Calculates the sum of absolute values of a complex matrix.
126 float SumAbs(const ComplexMatrix<float>& mat) {
127 float sum_abs = 0.f;
128 const complex<float>* const* mat_els = mat.elements();
129 for (size_t i = 0; i < mat.num_rows(); ++i) {
130 for (size_t j = 0; j < mat.num_columns(); ++j) {
131 sum_abs += std::abs(mat_els[i][j]);
132 }
133 }
134 return sum_abs;
135 }
136
137 // Calculates the sum of squares of a complex matrix. 125 // Calculates the sum of squares of a complex matrix.
138 float SumSquares(const ComplexMatrix<float>& mat) { 126 float SumSquares(const ComplexMatrix<float>& mat) {
139 float sum_squares = 0.f; 127 float sum_squares = 0.f;
140 const complex<float>* const* mat_els = mat.elements(); 128 const complex<float>* const* mat_els = mat.elements();
141 for (size_t i = 0; i < mat.num_rows(); ++i) { 129 for (size_t i = 0; i < mat.num_rows(); ++i) {
142 for (size_t j = 0; j < mat.num_columns(); ++j) { 130 for (size_t j = 0; j < mat.num_columns(); ++j) {
143 float abs_value = std::abs(mat_els[i][j]); 131 float abs_value = std::abs(mat_els[i][j]);
144 sum_squares += abs_value * abs_value; 132 sum_squares += abs_value * abs_value;
145 } 133 }
146 } 134 }
(...skipping 29 matching lines...)
176 return array_geometry; 164 return array_geometry;
177 } 165 }
178 166
179 } // namespace 167 } // namespace
180 168
181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); 169 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
182 170
183 // static 171 // static
184 const size_t NonlinearBeamformer::kNumFreqBins; 172 const size_t NonlinearBeamformer::kNumFreqBins;
185 173
174 PostFilterTransform::PostFilterTransform(size_t chunk_length,
175 float* window,
176 size_t fft_size)
177 : transform_(1u, 1u, chunk_length, window, fft_size, fft_size / 2, this),
178 num_freq_bins_(fft_size / 2 + 1) {}
179
180 void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) {
181 final_mask_ = final_mask;
182 transform_.ProcessChunk(data, data);
183 }
184
185 void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input,
186 size_t num_input_channels,
187 size_t num_freq_bins,
188 size_t num_output_channels,
189 complex<float>* const* output) {
190 RTC_CHECK_EQ(num_freq_bins_, num_freq_bins);
191 RTC_CHECK_EQ(1u, num_input_channels);
peah-webrtc 2016/06/08 12:04:55 As it is now, the beamformer will crash the applic
aluebs-webrtc 2016/06/09 02:11:46 Constraint removed. Changed to DCHECK.
192 RTC_CHECK_EQ(1u, num_output_channels);
193
194 for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) {
195 output[0][f_ix] = kCompensationGain * final_mask_[f_ix] * input[0][f_ix];
196 }
197 }
198
186 NonlinearBeamformer::NonlinearBeamformer( 199 NonlinearBeamformer::NonlinearBeamformer(
187 const std::vector<Point>& array_geometry, 200 const std::vector<Point>& array_geometry,
188 SphericalPointf target_direction) 201 SphericalPointf target_direction)
189 : num_input_channels_(array_geometry.size()), 202 : num_input_channels_(array_geometry.size()),
190 array_geometry_(GetCenteredArray(array_geometry)), 203 array_geometry_(GetCenteredArray(array_geometry)),
191 array_normal_(GetArrayNormalIfExists(array_geometry)), 204 array_normal_(GetArrayNormalIfExists(array_geometry)),
192 min_mic_spacing_(GetMinimumSpacing(array_geometry)), 205 min_mic_spacing_(GetMinimumSpacing(array_geometry)),
193 target_angle_radians_(target_direction.azimuth()), 206 target_angle_radians_(target_direction.azimuth()),
194 away_radians_(std::min( 207 away_radians_(std::min(
195 static_cast<float>(M_PI), 208 static_cast<float>(M_PI),
196 std::max(kMinAwayRadians, 209 std::max(kMinAwayRadians,
197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { 210 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); 211 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
199 } 212 }
200 213
201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { 214 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
202 chunk_length_ = 215 chunk_length_ =
203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); 216 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
204 sample_rate_hz_ = sample_rate_hz; 217 sample_rate_hz_ = sample_rate_hz;
205 218
206 high_pass_postfilter_mask_ = 1.f; 219 high_pass_postfilter_mask_ = 1.f;
207 is_target_present_ = false; 220 is_target_present_ = false;
208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; 221 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
209 interference_blocks_count_ = hold_target_blocks_; 222 interference_blocks_count_ = hold_target_blocks_;
210 223
211 lapped_transform_.reset(new LappedTransform(num_input_channels_, 224 process_transform_.reset(new LappedTransform(num_input_channels_,
212 1, 225 0u,
213 chunk_length_, 226 chunk_length_,
214 window_, 227 window_,
215 kFftSize, 228 kFftSize,
216 kFftSize / 2, 229 kFftSize / 2,
217 this)); 230 this));
231 postfilter_transform_.reset(new PostFilterTransform(
232 chunk_length_, window_, kFftSize));
233 dummy_out_.reset(new ChannelBuffer<float>(chunk_length_, 0u));
peah-webrtc 2016/06/08 12:04:55 Why do we need to allocate a dummy output of a ful
aluebs-webrtc 2016/06/09 02:11:46 It doesn't allocate anything, since if there are 0
234 const float wave_number_step =
235 (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds);
218 for (size_t i = 0; i < kNumFreqBins; ++i) { 236 for (size_t i = 0; i < kNumFreqBins; ++i) {
219 time_smooth_mask_[i] = 1.f; 237 time_smooth_mask_[i] = 1.f;
220 final_mask_[i] = 1.f; 238 final_mask_[i] = 1.f;
221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; 239 wave_numbers_[i] = i * wave_number_step;
222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
223 } 240 }
224 241
225 InitLowFrequencyCorrectionRanges(); 242 InitLowFrequencyCorrectionRanges();
226 InitDiffuseCovMats(); 243 InitDiffuseCovMats();
227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); 244 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
228 } 245 }
229 246
230 // These bin indexes determine the regions over which a mean is taken. This is 247 // These bin indexes determine the regions over which a mean is taken. This is
231 // applied as a constant value over the adjacent end "frequency correction" 248 // applied as a constant value over the adjacent end "frequency correction"
232 // regions. 249 // regions.
(...skipping 66 matching lines...)
299 void NonlinearBeamformer::InitDelaySumMasks() { 316 void NonlinearBeamformer::InitDelaySumMasks() {
300 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { 317 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
301 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); 318 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
302 CovarianceMatrixGenerator::PhaseAlignmentMasks( 319 CovarianceMatrixGenerator::PhaseAlignmentMasks(
303 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, 320 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
304 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); 321 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
305 322
306 complex_f norm_factor = sqrt( 323 complex_f norm_factor = sqrt(
307 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); 324 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
308 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); 325 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
309 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
310 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
311 normalized_delay_sum_masks_[f_ix]));
312 } 326 }
313 } 327 }
314 328
315 void NonlinearBeamformer::InitTargetCovMats() { 329 void NonlinearBeamformer::InitTargetCovMats() {
316 for (size_t i = 0; i < kNumFreqBins; ++i) { 330 for (size_t i = 0; i < kNumFreqBins; ++i) {
317 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); 331 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
318 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); 332 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
319 } 333 }
320 } 334 }
321 335
(...skipping 37 matching lines...)
359 void NonlinearBeamformer::NormalizeCovMats() { 373 void NonlinearBeamformer::NormalizeCovMats() {
360 for (size_t i = 0; i < kNumFreqBins; ++i) { 374 for (size_t i = 0; i < kNumFreqBins; ++i) {
361 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); 375 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
362 rpsiws_[i].clear(); 376 rpsiws_[i].clear();
363 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { 377 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); 378 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
365 } 379 }
366 } 380 }
367 } 381 }
368 382
369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, 383 void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) {
370 ChannelBuffer<float>* output) { 384 RTC_DCHECK_EQ(data.num_channels(), num_input_channels_);
371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); 385 RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_);
372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
373 386
374 float old_high_pass_mask = high_pass_postfilter_mask_; 387 old_high_pass_mask_ = high_pass_postfilter_mask_;
375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); 388 process_transform_->ProcessChunk(data.channels(0), dummy_out_->channels(0));
peah-webrtc 2016/06/08 12:04:55 Do you really need to pass a dummy output buffer h
aluebs-webrtc 2016/06/09 02:11:46 Good point, since I modified lapped_transform to a
376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible 389 }
377 // discontinuities. 390
391 void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) {
392 RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_);
393
394 postfilter_transform_->ProcessChunk(data->channels(0), final_mask_);
395
396 // Ramp up/down for smoothing is needed in order to avoid discontinuities in
397 // the transitions between 10 ms frames.
378 const float ramp_increment = 398 const float ramp_increment =
379 (high_pass_postfilter_mask_ - old_high_pass_mask) / 399 (high_pass_postfilter_mask_ - old_high_pass_mask_) /
380 input.num_frames_per_band(); 400 data->num_frames_per_band();
381 // Apply the smoothed high-pass mask to the first channel of each band. 401 for (size_t i = 1; i < data->num_bands(); ++i) {
382 // This can be done because the effect of the linear beamformer is negligible 402 float smoothed_mask = old_high_pass_mask_;
383 // compared to the post-filter. 403 for (size_t j = 0; j < data->num_frames_per_band(); ++j) {
384 for (size_t i = 1; i < input.num_bands(); ++i) {
385 float smoothed_mask = old_high_pass_mask;
386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
387 smoothed_mask += ramp_increment; 404 smoothed_mask += ramp_increment;
388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; 405 data->channels(i)[0][j] *= smoothed_mask;
peah-webrtc 2016/06/08 12:04:55 This code assumes single-channel processing, right
aluebs-webrtc 2016/06/09 02:11:46 Adjusted the postfilter to support multichannel da
389 } 406 }
390 } 407 }
391 } 408 }
392 409
393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { 410 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
394 target_angle_radians_ = target_direction.azimuth(); 411 target_angle_radians_ = target_direction.azimuth();
395 InitHighFrequencyCorrectionRanges(); 412 InitHighFrequencyCorrectionRanges();
396 InitInterfAngles(); 413 InitInterfAngles();
397 InitDelaySumMasks(); 414 InitDelaySumMasks();
398 InitTargetCovMats(); 415 InitTargetCovMats();
399 InitInterfCovMats(); 416 InitInterfCovMats();
400 NormalizeCovMats(); 417 NormalizeCovMats();
401 } 418 }
402 419
403 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { 420 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
404 // If more than half-beamwidth degrees away from the beam's center, 421 // If more than half-beamwidth degrees away from the beam's center,
405 // you are out of the beam. 422 // you are out of the beam.
406 return fabs(spherical_point.azimuth() - target_angle_radians_) < 423 return fabs(spherical_point.azimuth() - target_angle_radians_) <
407 kHalfBeamWidthRadians; 424 kHalfBeamWidthRadians;
408 } 425 }
409 426
410 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, 427 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
411 size_t num_input_channels, 428 size_t num_input_channels,
412 size_t num_freq_bins, 429 size_t num_freq_bins,
413 size_t num_output_channels, 430 size_t num_output_channels,
414 complex_f* const* output) { 431 complex_f* const* output) {
415 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins); 432 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);
416 RTC_CHECK_EQ(num_input_channels_, num_input_channels); 433 RTC_CHECK_EQ(num_input_channels_, num_input_channels);
417 RTC_CHECK_EQ(1u, num_output_channels); 434 RTC_CHECK_EQ(0u, num_output_channels);
418 435
419 // Calculating the post-filter masks. Note that we need two for each 436 // Calculating the post-filter masks. Note that we need two for each
420 // frequency bin to account for the positive and negative interferer 437 // frequency bin to account for the positive and negative interferer
421 // angle. 438 // angle.
422 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { 439 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
423 eig_m_.CopyFromColumn(input, i, num_input_channels_); 440 eig_m_.CopyFromColumn(input, i, num_input_channels_);
424 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_)); 441 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
425 if (eig_m_norm_factor != 0.f) { 442 if (eig_m_norm_factor != 0.f) {
426 eig_m_.Scale(1.f / eig_m_norm_factor); 443 eig_m_.Scale(1.f / eig_m_norm_factor);
427 } 444 }
(...skipping 21 matching lines...)
449 new_mask_[i] = tmp_mask; 466 new_mask_[i] = tmp_mask;
450 } 467 }
451 } 468 }
452 } 469 }
453 470
454 ApplyMaskTimeSmoothing(); 471 ApplyMaskTimeSmoothing();
455 EstimateTargetPresence(); 472 EstimateTargetPresence();
456 ApplyLowFrequencyCorrection(); 473 ApplyLowFrequencyCorrection();
457 ApplyHighFrequencyCorrection(); 474 ApplyHighFrequencyCorrection();
458 ApplyMaskFrequencySmoothing(); 475 ApplyMaskFrequencySmoothing();
459 ApplyMasks(input, output);
460 } 476 }
461 477
462 float NonlinearBeamformer::CalculatePostfilterMask( 478 float NonlinearBeamformer::CalculatePostfilterMask(
463 const ComplexMatrixF& interf_cov_mat, 479 const ComplexMatrixF& interf_cov_mat,
464 float rpsiw, 480 float rpsiw,
465 float ratio_rxiw_rxim, 481 float ratio_rxiw_rxim,
466 float rmw_r) { 482 float rmw_r) {
467 float rpsim = Norm(interf_cov_mat, eig_m_); 483 float rpsim = Norm(interf_cov_mat, eig_m_);
468 484
469 float ratio = 0.f; 485 float ratio = 0.f;
470 if (rpsim > 0.f) { 486 if (rpsim > 0.f) {
471 ratio = rpsiw / rpsim; 487 ratio = rpsiw / rpsim;
472 } 488 }
473 489
474 float numerator = 1.f - kCutOffConstant; 490 float numerator = 1.f - kCutOffConstant;
475 if (rmw_r > 0.f) { 491 if (rmw_r > 0.f) {
476 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r); 492 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r);
477 } 493 }
478 494
479 float denominator = 1.f - kCutOffConstant; 495 float denominator = 1.f - kCutOffConstant;
480 if (ratio_rxiw_rxim > 0.f) { 496 if (ratio_rxiw_rxim > 0.f) {
481 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim); 497 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim);
482 } 498 }
483 499
484 return numerator / denominator; 500 return numerator / denominator;
485 } 501 }
486 502
487 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
488 complex_f* const* output) {
489 complex_f* output_channel = output[0];
490 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
491 output_channel[f_ix] = complex_f(0.f, 0.f);
492
493 const complex_f* delay_sum_mask_els =
494 normalized_delay_sum_masks_[f_ix].elements()[0];
495 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
496 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
497 }
498
499 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
500 }
501 }
502
503 // Smooth new_mask_ into time_smooth_mask_. 503 // Smooth new_mask_ into time_smooth_mask_.
504 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { 504 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
505 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { 505 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
506 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + 506 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
507 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; 507 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
508 } 508 }
509 } 509 }
510 510
511 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. 511 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
512 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { 512 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
(...skipping 57 matching lines...)
570 new_mask_ + high_mean_end_bin_ + 1); 570 new_mask_ + high_mean_end_bin_ + 1);
571 if (new_mask_[quantile] > kMaskTargetThreshold) { 571 if (new_mask_[quantile] > kMaskTargetThreshold) {
572 is_target_present_ = true; 572 is_target_present_ = true;
573 interference_blocks_count_ = 0; 573 interference_blocks_count_ = 0;
574 } else { 574 } else {
575 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; 575 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
576 } 576 }
577 } 577 }
578 578
579 } // namespace webrtc 579 } // namespace webrtc
OLDNEW

Powered by Google App Engine