Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 2110503002: Revert "Pull out the PostFilter to its own NonlinearBeamformer API" (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 } 115 }
116 116
117 return result; 117 return result;
118 } 118 }
119 119
120 // Works for positive numbers only. 120 // Works for positive numbers only.
121 size_t Round(float x) { 121 size_t Round(float x) {
122 return static_cast<size_t>(std::floor(x + 0.5f)); 122 return static_cast<size_t>(std::floor(x + 0.5f));
123 } 123 }
124 124
125 // Calculates the sum of absolute values of a complex matrix.
126 float SumAbs(const ComplexMatrix<float>& mat) {
127 float sum_abs = 0.f;
128 const complex<float>* const* mat_els = mat.elements();
129 for (size_t i = 0; i < mat.num_rows(); ++i) {
130 for (size_t j = 0; j < mat.num_columns(); ++j) {
131 sum_abs += std::abs(mat_els[i][j]);
132 }
133 }
134 return sum_abs;
135 }
136
125 // Calculates the sum of squares of a complex matrix. 137 // Calculates the sum of squares of a complex matrix.
126 float SumSquares(const ComplexMatrix<float>& mat) { 138 float SumSquares(const ComplexMatrix<float>& mat) {
127 float sum_squares = 0.f; 139 float sum_squares = 0.f;
128 const complex<float>* const* mat_els = mat.elements(); 140 const complex<float>* const* mat_els = mat.elements();
129 for (size_t i = 0; i < mat.num_rows(); ++i) { 141 for (size_t i = 0; i < mat.num_rows(); ++i) {
130 for (size_t j = 0; j < mat.num_columns(); ++j) { 142 for (size_t j = 0; j < mat.num_columns(); ++j) {
131 float abs_value = std::abs(mat_els[i][j]); 143 float abs_value = std::abs(mat_els[i][j]);
132 sum_squares += abs_value * abs_value; 144 sum_squares += abs_value * abs_value;
133 } 145 }
134 } 146 }
(...skipping 29 matching lines...) Expand all
164 return array_geometry; 176 return array_geometry;
165 } 177 }
166 178
167 } // namespace 179 } // namespace
168 180
169 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); 181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
170 182
171 // static 183 // static
172 const size_t NonlinearBeamformer::kNumFreqBins; 184 const size_t NonlinearBeamformer::kNumFreqBins;
173 185
174 PostFilterTransform::PostFilterTransform(size_t num_channels,
175 size_t chunk_length,
176 float* window,
177 size_t fft_size)
178 : transform_(num_channels,
179 num_channels,
180 chunk_length,
181 window,
182 fft_size,
183 fft_size / 2,
184 this),
185 num_freq_bins_(fft_size / 2 + 1) {}
186
187 void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) {
188 final_mask_ = final_mask;
189 transform_.ProcessChunk(data, data);
190 }
191
192 void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input,
193 size_t num_input_channels,
194 size_t num_freq_bins,
195 size_t num_output_channels,
196 complex<float>* const* output) {
197 RTC_DCHECK_EQ(num_freq_bins_, num_freq_bins);
198 RTC_DCHECK_EQ(num_input_channels, num_output_channels);
199
200 for (size_t ch = 0; ch < num_input_channels; ++ch) {
201 for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) {
202 output[ch][f_ix] =
203 kCompensationGain * final_mask_[f_ix] * input[ch][f_ix];
204 }
205 }
206 }
207
208 NonlinearBeamformer::NonlinearBeamformer( 186 NonlinearBeamformer::NonlinearBeamformer(
209 const std::vector<Point>& array_geometry, 187 const std::vector<Point>& array_geometry,
210 size_t num_postfilter_channels,
211 SphericalPointf target_direction) 188 SphericalPointf target_direction)
212 : num_input_channels_(array_geometry.size()), 189 : num_input_channels_(array_geometry.size()),
213 num_postfilter_channels_(num_postfilter_channels),
214 array_geometry_(GetCenteredArray(array_geometry)), 190 array_geometry_(GetCenteredArray(array_geometry)),
215 array_normal_(GetArrayNormalIfExists(array_geometry)), 191 array_normal_(GetArrayNormalIfExists(array_geometry)),
216 min_mic_spacing_(GetMinimumSpacing(array_geometry)), 192 min_mic_spacing_(GetMinimumSpacing(array_geometry)),
217 target_angle_radians_(target_direction.azimuth()), 193 target_angle_radians_(target_direction.azimuth()),
218 away_radians_(std::min( 194 away_radians_(std::min(
219 static_cast<float>(M_PI), 195 static_cast<float>(M_PI),
220 std::max(kMinAwayRadians, 196 std::max(kMinAwayRadians,
221 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { 197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
222 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
223 } 199 }
224 200
225 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
226 chunk_length_ = 202 chunk_length_ =
227 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
228 sample_rate_hz_ = sample_rate_hz; 204 sample_rate_hz_ = sample_rate_hz;
229 205
230 high_pass_postfilter_mask_ = 1.f; 206 high_pass_postfilter_mask_ = 1.f;
231 is_target_present_ = false; 207 is_target_present_ = false;
232 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; 208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
233 interference_blocks_count_ = hold_target_blocks_; 209 interference_blocks_count_ = hold_target_blocks_;
234 210
235 process_transform_.reset(new LappedTransform(num_input_channels_, 211 lapped_transform_.reset(new LappedTransform(num_input_channels_,
236 0u, 212 1,
237 chunk_length_, 213 chunk_length_,
238 window_, 214 window_,
239 kFftSize, 215 kFftSize,
240 kFftSize / 2, 216 kFftSize / 2,
241 this)); 217 this));
242 postfilter_transform_.reset(new PostFilterTransform(
243 num_postfilter_channels_, chunk_length_, window_, kFftSize));
244 const float wave_number_step =
245 (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds);
246 for (size_t i = 0; i < kNumFreqBins; ++i) { 218 for (size_t i = 0; i < kNumFreqBins; ++i) {
247 time_smooth_mask_[i] = 1.f; 219 time_smooth_mask_[i] = 1.f;
248 final_mask_[i] = 1.f; 220 final_mask_[i] = 1.f;
249 wave_numbers_[i] = i * wave_number_step; 221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
250 } 223 }
251 224
252 InitLowFrequencyCorrectionRanges(); 225 InitLowFrequencyCorrectionRanges();
253 InitDiffuseCovMats(); 226 InitDiffuseCovMats();
254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); 227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
255 } 228 }
256 229
257 // These bin indexes determine the regions over which a mean is taken. This is 230 // These bin indexes determine the regions over which a mean is taken. This is
258 // applied as a constant value over the adjacent end "frequency correction" 231 // applied as a constant value over the adjacent end "frequency correction"
259 // regions. 232 // regions.
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 void NonlinearBeamformer::InitDelaySumMasks() { 299 void NonlinearBeamformer::InitDelaySumMasks() {
327 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { 300 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
328 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); 301 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
329 CovarianceMatrixGenerator::PhaseAlignmentMasks( 302 CovarianceMatrixGenerator::PhaseAlignmentMasks(
330 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, 303 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
331 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); 304 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
332 305
333 complex_f norm_factor = sqrt( 306 complex_f norm_factor = sqrt(
334 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); 307 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
335 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); 308 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
309 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
310 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
311 normalized_delay_sum_masks_[f_ix]));
336 } 312 }
337 } 313 }
338 314
339 void NonlinearBeamformer::InitTargetCovMats() { 315 void NonlinearBeamformer::InitTargetCovMats() {
340 for (size_t i = 0; i < kNumFreqBins; ++i) { 316 for (size_t i = 0; i < kNumFreqBins; ++i) {
341 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); 317 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
342 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); 318 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
343 } 319 }
344 } 320 }
345 321
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
383 void NonlinearBeamformer::NormalizeCovMats() { 359 void NonlinearBeamformer::NormalizeCovMats() {
384 for (size_t i = 0; i < kNumFreqBins; ++i) { 360 for (size_t i = 0; i < kNumFreqBins; ++i) {
385 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); 361 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
386 rpsiws_[i].clear(); 362 rpsiws_[i].clear();
387 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { 363 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
388 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); 364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
389 } 365 }
390 } 366 }
391 } 367 }
392 368
393 void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) { 369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
394 RTC_DCHECK_EQ(data.num_channels(), num_input_channels_); 370 ChannelBuffer<float>* output) {
395 RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_); 371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
396 373
397 old_high_pass_mask_ = high_pass_postfilter_mask_; 374 float old_high_pass_mask = high_pass_postfilter_mask_;
398 process_transform_->ProcessChunk(data.channels(0), nullptr); 375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
399 } 376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible
400 377 // discontinuities.
401 void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) {
402 RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_);
403 // TODO(aluebs): Change to RTC_CHECK_EQ once the ChannelBuffer is updated.
404 RTC_DCHECK_GE(data->num_channels(), num_postfilter_channels_);
405
406 postfilter_transform_->ProcessChunk(data->channels(0), final_mask_);
407
408 // Ramp up/down for smoothing is needed in order to avoid discontinuities in
409 // the transitions between 10 ms frames.
410 const float ramp_increment = 378 const float ramp_increment =
411 (high_pass_postfilter_mask_ - old_high_pass_mask_) / 379 (high_pass_postfilter_mask_ - old_high_pass_mask) /
412 data->num_frames_per_band(); 380 input.num_frames_per_band();
413 for (size_t i = 1; i < data->num_bands(); ++i) { 381 // Apply the smoothed high-pass mask to the first channel of each band.
414 float smoothed_mask = old_high_pass_mask_; 382 // This can be done because the effect of the linear beamformer is negligible
415 for (size_t j = 0; j < data->num_frames_per_band(); ++j) { 383 // compared to the post-filter.
384 for (size_t i = 1; i < input.num_bands(); ++i) {
385 float smoothed_mask = old_high_pass_mask;
386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
416 smoothed_mask += ramp_increment; 387 smoothed_mask += ramp_increment;
417 for (size_t k = 0; k < num_postfilter_channels_; ++k) { 388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
418 data->channels(i)[k][j] *= smoothed_mask;
419 }
420 } 389 }
421 } 390 }
422 } 391 }
423 392
424 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { 393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
425 target_angle_radians_ = target_direction.azimuth(); 394 target_angle_radians_ = target_direction.azimuth();
426 InitHighFrequencyCorrectionRanges(); 395 InitHighFrequencyCorrectionRanges();
427 InitInterfAngles(); 396 InitInterfAngles();
428 InitDelaySumMasks(); 397 InitDelaySumMasks();
429 InitTargetCovMats(); 398 InitTargetCovMats();
430 InitInterfCovMats(); 399 InitInterfCovMats();
431 NormalizeCovMats(); 400 NormalizeCovMats();
432 } 401 }
433 402
434 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { 403 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
435 // If more than half-beamwidth degrees away from the beam's center, 404 // If more than half-beamwidth degrees away from the beam's center,
436 // you are out of the beam. 405 // you are out of the beam.
437 return fabs(spherical_point.azimuth() - target_angle_radians_) < 406 return fabs(spherical_point.azimuth() - target_angle_radians_) <
438 kHalfBeamWidthRadians; 407 kHalfBeamWidthRadians;
439 } 408 }
440 409
441 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, 410 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
442 size_t num_input_channels, 411 size_t num_input_channels,
443 size_t num_freq_bins, 412 size_t num_freq_bins,
444 size_t num_output_channels, 413 size_t num_output_channels,
445 complex_f* const* output) { 414 complex_f* const* output) {
446 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins); 415 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);
447 RTC_CHECK_EQ(num_input_channels_, num_input_channels); 416 RTC_CHECK_EQ(num_input_channels_, num_input_channels);
448 RTC_CHECK_EQ(0u, num_output_channels); 417 RTC_CHECK_EQ(1u, num_output_channels);
449 418
450 // Calculating the post-filter masks. Note that we need two for each 419 // Calculating the post-filter masks. Note that we need two for each
451 // frequency bin to account for the positive and negative interferer 420 // frequency bin to account for the positive and negative interferer
452 // angle. 421 // angle.
453 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { 422 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
454 eig_m_.CopyFromColumn(input, i, num_input_channels_); 423 eig_m_.CopyFromColumn(input, i, num_input_channels_);
455 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_)); 424 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
456 if (eig_m_norm_factor != 0.f) { 425 if (eig_m_norm_factor != 0.f) {
457 eig_m_.Scale(1.f / eig_m_norm_factor); 426 eig_m_.Scale(1.f / eig_m_norm_factor);
458 } 427 }
(...skipping 21 matching lines...) Expand all
480 new_mask_[i] = tmp_mask; 449 new_mask_[i] = tmp_mask;
481 } 450 }
482 } 451 }
483 } 452 }
484 453
485 ApplyMaskTimeSmoothing(); 454 ApplyMaskTimeSmoothing();
486 EstimateTargetPresence(); 455 EstimateTargetPresence();
487 ApplyLowFrequencyCorrection(); 456 ApplyLowFrequencyCorrection();
488 ApplyHighFrequencyCorrection(); 457 ApplyHighFrequencyCorrection();
489 ApplyMaskFrequencySmoothing(); 458 ApplyMaskFrequencySmoothing();
459 ApplyMasks(input, output);
490 } 460 }
491 461
492 float NonlinearBeamformer::CalculatePostfilterMask( 462 float NonlinearBeamformer::CalculatePostfilterMask(
493 const ComplexMatrixF& interf_cov_mat, 463 const ComplexMatrixF& interf_cov_mat,
494 float rpsiw, 464 float rpsiw,
495 float ratio_rxiw_rxim, 465 float ratio_rxiw_rxim,
496 float rmw_r) { 466 float rmw_r) {
497 float rpsim = Norm(interf_cov_mat, eig_m_); 467 float rpsim = Norm(interf_cov_mat, eig_m_);
498 468
499 float ratio = 0.f; 469 float ratio = 0.f;
500 if (rpsim > 0.f) { 470 if (rpsim > 0.f) {
501 ratio = rpsiw / rpsim; 471 ratio = rpsiw / rpsim;
502 } 472 }
503 473
504 float numerator = 1.f - kCutOffConstant; 474 float numerator = 1.f - kCutOffConstant;
505 if (rmw_r > 0.f) { 475 if (rmw_r > 0.f) {
506 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r); 476 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r);
507 } 477 }
508 478
509 float denominator = 1.f - kCutOffConstant; 479 float denominator = 1.f - kCutOffConstant;
510 if (ratio_rxiw_rxim > 0.f) { 480 if (ratio_rxiw_rxim > 0.f) {
511 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim); 481 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim);
512 } 482 }
513 483
514 return numerator / denominator; 484 return numerator / denominator;
515 } 485 }
516 486
487 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
488 complex_f* const* output) {
489 complex_f* output_channel = output[0];
490 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
491 output_channel[f_ix] = complex_f(0.f, 0.f);
492
493 const complex_f* delay_sum_mask_els =
494 normalized_delay_sum_masks_[f_ix].elements()[0];
495 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
496 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
497 }
498
499 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
500 }
501 }
502
517 // Smooth new_mask_ into time_smooth_mask_. 503 // Smooth new_mask_ into time_smooth_mask_.
518 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { 504 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
519 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { 505 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
520 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + 506 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
521 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; 507 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
522 } 508 }
523 } 509 }
524 510
525 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. 511 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
526 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { 512 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
584 new_mask_ + high_mean_end_bin_ + 1); 570 new_mask_ + high_mean_end_bin_ + 1);
585 if (new_mask_[quantile] > kMaskTargetThreshold) { 571 if (new_mask_[quantile] > kMaskTargetThreshold) {
586 is_target_present_ = true; 572 is_target_present_ = true;
587 interference_blocks_count_ = 0; 573 interference_blocks_count_ = 0;
588 } else { 574 } else {
589 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; 575 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
590 } 576 }
591 } 577 }
592 578
593 } // namespace webrtc 579 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698