Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(32)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1982183002: Pull out the PostFilter to its own NonlinearBeamformer API (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 array_geometry[i].c[dim] -= center; 173 array_geometry[i].c[dim] -= center;
174 } 174 }
175 } 175 }
176 return array_geometry; 176 return array_geometry;
177 } 177 }
178 178
179 } // namespace 179 } // namespace
180 180
181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); 181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
182 182
183 // static 183 // static
peah-webrtc 2016/05/22 21:06:48 Please correct this comment as well while you are
aluebs-webrtc 2016/05/26 01:04:45 What is wrong with it?
peah-webrtc 2016/05/26 08:48:52 It is not a proper sentence, and not terminated by
aluebs-webrtc 2016/05/28 03:00:00 I think the static definition is clearer like this
peah-webrtc 2016/05/30 11:49:25 I think the guidelines should be applied regardles
aluebs-webrtc 2016/06/01 00:16:34 Acknowledged.
184 const size_t NonlinearBeamformer::kNumFreqBins; 184 const size_t NonlinearBeamformer::kNumFreqBins;
185 185
186 class PostFilterTransform : public LappedTransform::Callback {
peah-webrtc 2016/05/22 21:06:48 This class is a way to be able to use the callback
aluebs-webrtc 2016/05/26 01:04:45 I don't see how this simplifies the code, but I ag
peah-webrtc 2016/05/26 08:48:52 The simplification is separation of concerns and d
aluebs-webrtc 2016/05/28 03:00:00 I think the separation/encapsulation is almost the
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
187 public:
188 explicit PostFilterTransform(NonlinearBeamformer* beamformer)
189 : beamformer_(beamformer) {}
190
191 protected:
192 // Process one frequency-domain block of audio. This is where the fun
peah-webrtc 2016/05/22 21:06:48 Please describe this more thoroughly. I'm not sure
aluebs-webrtc 2016/05/26 01:04:45 Removed comment. It was just to be consistent with
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
193 // happens. Implements LappedTransform::Callback.
194 void ProcessAudioBlock(const complex<float>* const* input,
195 size_t num_input_channels,
196 size_t num_freq_bins,
197 size_t num_output_channels,
198 complex<float>* const* output) override {
199 RTC_CHECK_EQ(NonlinearBeamformer::kNumFreqBins, num_freq_bins);
200 RTC_CHECK_EQ(1u, num_input_channels);
201 RTC_CHECK_EQ(1u, num_output_channels);
202
203 beamformer_->ApplyPostFilter(input[0], output[0]);
204 }
205
206 private:
207 NonlinearBeamformer* beamformer_;
208 };
209
186 NonlinearBeamformer::NonlinearBeamformer( 210 NonlinearBeamformer::NonlinearBeamformer(
187 const std::vector<Point>& array_geometry, 211 const std::vector<Point>& array_geometry,
188 SphericalPointf target_direction) 212 SphericalPointf target_direction)
189 : num_input_channels_(array_geometry.size()), 213 : num_input_channels_(array_geometry.size()),
190 array_geometry_(GetCenteredArray(array_geometry)), 214 array_geometry_(GetCenteredArray(array_geometry)),
191 array_normal_(GetArrayNormalIfExists(array_geometry)), 215 array_normal_(GetArrayNormalIfExists(array_geometry)),
192 min_mic_spacing_(GetMinimumSpacing(array_geometry)), 216 min_mic_spacing_(GetMinimumSpacing(array_geometry)),
193 target_angle_radians_(target_direction.azimuth()), 217 target_angle_radians_(target_direction.azimuth()),
194 away_radians_(std::min( 218 away_radians_(std::min(
195 static_cast<float>(M_PI), 219 static_cast<float>(M_PI),
196 std::max(kMinAwayRadians, 220 std::max(kMinAwayRadians,
197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { 221 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); 222 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
199 } 223 }
200 224
201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { 225 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
202 chunk_length_ = 226 chunk_length_ =
203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); 227 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
204 sample_rate_hz_ = sample_rate_hz; 228 sample_rate_hz_ = sample_rate_hz;
205 229
206 high_pass_postfilter_mask_ = 1.f; 230 high_pass_postfilter_mask_ = 1.f;
207 is_target_present_ = false; 231 is_target_present_ = false;
208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; 232 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
209 interference_blocks_count_ = hold_target_blocks_; 233 interference_blocks_count_ = hold_target_blocks_;
210 234
211 lapped_transform_.reset(new LappedTransform(num_input_channels_, 235 process_transform_.reset(new LappedTransform(num_input_channels_,
212 1, 236 1u,
213 chunk_length_, 237 chunk_length_,
214 window_, 238 window_,
215 kFftSize, 239 kFftSize,
216 kFftSize / 2, 240 kFftSize / 2,
217 this)); 241 this));
242 postfilter_transform_.reset(new LappedTransform(
243 1u, 1u, chunk_length_, window_, kFftSize, kFftSize / 2,
244 new PostFilterTransform(this)));
218 for (size_t i = 0; i < kNumFreqBins; ++i) { 245 for (size_t i = 0; i < kNumFreqBins; ++i) {
219 time_smooth_mask_[i] = 1.f; 246 time_smooth_mask_[i] = 1.f;
220 final_mask_[i] = 1.f; 247 final_mask_[i] = 1.f;
221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; 248 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
peah-webrtc 2016/05/22 21:06:48 Please change to use a precomputed 1/kFftSize * sa
aluebs-webrtc 2016/05/26 01:04:45 Adds unrelated changes to the CL, but if you think
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; 249 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
peah-webrtc 2016/05/22 21:06:48 Please change to use a precomputed 2*pi/kSpeedOfSo
aluebs-webrtc 2016/05/26 01:04:45 Adds unrelated changes to the CL, but if you think
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
223 } 250 }
224 251
225 InitLowFrequencyCorrectionRanges(); 252 InitLowFrequencyCorrectionRanges();
226 InitDiffuseCovMats(); 253 InitDiffuseCovMats();
227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); 254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
228 } 255 }
229 256
230 // These bin indexes determine the regions over which a mean is taken. This is 257 // These bin indexes determine the regions over which a mean is taken. This is
231 // applied as a constant value over the adjacent end "frequency correction" 258 // applied as a constant value over the adjacent end "frequency correction"
232 // regions. 259 // regions.
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); 391 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
365 } 392 }
366 } 393 }
367 } 394 }
368 395
369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, 396 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
370 ChannelBuffer<float>* output) { 397 ChannelBuffer<float>* output) {
371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); 398 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); 399 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
373 400
374 float old_high_pass_mask = high_pass_postfilter_mask_; 401 old_high_pass_mask_ = high_pass_postfilter_mask_;
375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); 402 process_transform_->ProcessChunk(input.channels(0), output->channels(0));
403 // Copy over only the first channel of each band.
peah-webrtc 2016/05/22 21:06:48 Have you checked the impact on the signal when thi
aluebs-webrtc 2016/05/26 01:04:45 This is no longer relevant, since we decided offli
peah-webrtc 2016/05/26 08:48:52 I think the perfect reconstruction may actually be
aluebs-webrtc 2016/05/28 03:00:00 I meant that other components are already non-line
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
404 // This can be done because the effect of the linear beamformer is negligible
405 // compared to the post-filter.
406 for (size_t i = 1; i < input.num_bands(); ++i) {
407 memcpy(output->channels(i)[0],
peah-webrtc 2016/05/22 21:06:48 what happens if the output is dual channel? Since
aluebs-webrtc 2016/05/26 01:04:45 This can't be done as is, since the input and outp
peah-webrtc 2016/05/30 11:49:25 So this means that the input could have 2 channels
aluebs-webrtc 2016/06/01 00:16:34 I don't think that adding an additional interface
peah-webrtc 2016/06/01 14:51:01 It actually does not add code complexity, as it se
aluebs-webrtc 2016/06/01 22:13:20 Interface removed and changed to what input-only.
408 input.channels(i)[0],
409 input.num_frames_per_band() * sizeof(output->channels(i)[0][0]));
410 }
411 }
412
413 void NonlinearBeamformer::PostFilter(const ChannelBuffer<float>& input,
414 ChannelBuffer<float>* output) {
415 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
416
417 postfilter_transform_->ProcessChunk(input.channels(0), output->channels(0));
418
376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible 419 // Ramp up/down for smoothing. 1 mask per 10ms results in audible
peah-webrtc 2016/05/22 21:06:48 I guess, what you mean is that smoothing is needed
aluebs-webrtc 2016/05/26 01:04:45 Adds unrelated changes to the CL, but if you think
peah-webrtc 2016/05/30 11:49:25 Sounds awesome!
377 // discontinuities. 420 // discontinuities.
378 const float ramp_increment = 421 const float ramp_increment =
379 (high_pass_postfilter_mask_ - old_high_pass_mask) / 422 (high_pass_postfilter_mask_ - old_high_pass_mask_) /
380 input.num_frames_per_band(); 423 input.num_frames_per_band();
381 // Apply the smoothed high-pass mask to the first channel of each band.
382 // This can be done because the effect of the linear beamformer is negligible
383 // compared to the post-filter.
384 for (size_t i = 1; i < input.num_bands(); ++i) { 424 for (size_t i = 1; i < input.num_bands(); ++i) {
385 float smoothed_mask = old_high_pass_mask; 425 float smoothed_mask = old_high_pass_mask_;
386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { 426 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
387 smoothed_mask += ramp_increment; 427 smoothed_mask += ramp_increment;
388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; 428 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
389 } 429 }
390 } 430 }
391 } 431 }
392 432
393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { 433 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
394 target_angle_radians_ = target_direction.azimuth(); 434 target_angle_radians_ = target_direction.azimuth();
395 InitHighFrequencyCorrectionRanges(); 435 InitHighFrequencyCorrectionRanges();
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
449 new_mask_[i] = tmp_mask; 489 new_mask_[i] = tmp_mask;
450 } 490 }
451 } 491 }
452 } 492 }
453 493
454 ApplyMaskTimeSmoothing(); 494 ApplyMaskTimeSmoothing();
455 EstimateTargetPresence(); 495 EstimateTargetPresence();
456 ApplyLowFrequencyCorrection(); 496 ApplyLowFrequencyCorrection();
457 ApplyHighFrequencyCorrection(); 497 ApplyHighFrequencyCorrection();
458 ApplyMaskFrequencySmoothing(); 498 ApplyMaskFrequencySmoothing();
459 ApplyMasks(input, output); 499 ApplyDelayAndSum(input, output);
460 } 500 }
461 501
462 float NonlinearBeamformer::CalculatePostfilterMask( 502 float NonlinearBeamformer::CalculatePostfilterMask(
463 const ComplexMatrixF& interf_cov_mat, 503 const ComplexMatrixF& interf_cov_mat,
464 float rpsiw, 504 float rpsiw,
465 float ratio_rxiw_rxim, 505 float ratio_rxiw_rxim,
466 float rmw_r) { 506 float rmw_r) {
467 float rpsim = Norm(interf_cov_mat, eig_m_); 507 float rpsim = Norm(interf_cov_mat, eig_m_);
468 508
469 float ratio = 0.f; 509 float ratio = 0.f;
470 if (rpsim > 0.f) { 510 if (rpsim > 0.f) {
471 ratio = rpsiw / rpsim; 511 ratio = rpsiw / rpsim;
472 } 512 }
473 513
474 float numerator = 1.f - kCutOffConstant; 514 float numerator = 1.f - kCutOffConstant;
475 if (rmw_r > 0.f) { 515 if (rmw_r > 0.f) {
476 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r); 516 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r);
477 } 517 }
478 518
479 float denominator = 1.f - kCutOffConstant; 519 float denominator = 1.f - kCutOffConstant;
480 if (ratio_rxiw_rxim > 0.f) { 520 if (ratio_rxiw_rxim > 0.f) {
481 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim); 521 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim);
482 } 522 }
483 523
484 return numerator / denominator; 524 return numerator / denominator;
485 } 525 }
486 526
487 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, 527 void NonlinearBeamformer::ApplyDelayAndSum(const complex_f* const* input,
488 complex_f* const* output) { 528 complex_f* const* output) {
489 complex_f* output_channel = output[0]; 529 complex_f* output_channel = output[0];
490 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { 530 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
491 output_channel[f_ix] = complex_f(0.f, 0.f); 531 output_channel[f_ix] = complex_f(0.f, 0.f);
492 532
493 const complex_f* delay_sum_mask_els = 533 const complex_f* delay_sum_mask_els =
494 normalized_delay_sum_masks_[f_ix].elements()[0]; 534 normalized_delay_sum_masks_[f_ix].elements()[0];
495 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { 535 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
496 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; 536 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
497 } 537 }
498
499 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
500 } 538 }
501 } 539 }
502 540
541 void NonlinearBeamformer::ApplyPostFilter(const complex_f* input,
542 complex_f* output) {
543 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
544 output[f_ix] = kCompensationGain * final_mask_[f_ix] * input[f_ix];
545 }
546 }
547
503 // Smooth new_mask_ into time_smooth_mask_. 548 // Smooth new_mask_ into time_smooth_mask_.
504 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { 549 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
505 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { 550 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
506 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + 551 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
507 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; 552 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
508 } 553 }
509 } 554 }
510 555
511 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. 556 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
512 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { 557 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
570 new_mask_ + high_mean_end_bin_ + 1); 615 new_mask_ + high_mean_end_bin_ + 1);
571 if (new_mask_[quantile] > kMaskTargetThreshold) { 616 if (new_mask_[quantile] > kMaskTargetThreshold) {
572 is_target_present_ = true; 617 is_target_present_ = true;
573 interference_blocks_count_ = 0; 618 interference_blocks_count_ = 0;
574 } else { 619 } else {
575 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; 620 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
576 } 621 }
577 } 622 }
578 623
579 } // namespace webrtc 624 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698