Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 173 array_geometry[i].c[dim] -= center; | 173 array_geometry[i].c[dim] -= center; |
| 174 } | 174 } |
| 175 } | 175 } |
| 176 return array_geometry; | 176 return array_geometry; |
| 177 } | 177 } |
| 178 | 178 |
| 179 } // namespace | 179 } // namespace |
| 180 | 180 |
| 181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); | 181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); |
| 182 | 182 |
| 183 // static | 183 // static |
|
peah-webrtc
2016/05/22 21:06:48
Please correct this comment as well while you are
aluebs-webrtc
2016/05/26 01:04:45
What is wrong with it?
peah-webrtc
2016/05/26 08:48:52
It is not a proper sentence, and not terminated by
aluebs-webrtc
2016/05/28 03:00:00
I think the static definition is clearer like this
peah-webrtc
2016/05/30 11:49:25
I think the guidelines should be applied regardles
aluebs-webrtc
2016/06/01 00:16:34
Acknowledged.
| |
| 184 const size_t NonlinearBeamformer::kNumFreqBins; | 184 const size_t NonlinearBeamformer::kNumFreqBins; |
| 185 | 185 |
| 186 class PostFilterTransform : public LappedTransform::Callback { | |
|
peah-webrtc
2016/05/22 21:06:48
This class is a way to be able to use the callback
aluebs-webrtc
2016/05/26 01:04:45
I don't see how this simplifies the code, but I ag
peah-webrtc
2016/05/26 08:48:52
The simplification is separation of concerns and d
aluebs-webrtc
2016/05/28 03:00:00
I think the separation/encapsulation is almost the
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
| |
| 187 public: | |
| 188 explicit PostFilterTransform(NonlinearBeamformer* beamformer) | |
| 189 : beamformer_(beamformer) {} | |
| 190 | |
| 191 protected: | |
| 192 // Process one frequency-domain block of audio. This is where the fun | |
|
peah-webrtc
2016/05/22 21:06:48
Please describe this more thoroughly. I'm not sure
aluebs-webrtc
2016/05/26 01:04:45
Removed comment. It was just to be consistent with
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
| |
| 193 // happens. Implements LappedTransform::Callback. | |
| 194 void ProcessAudioBlock(const complex<float>* const* input, | |
| 195 size_t num_input_channels, | |
| 196 size_t num_freq_bins, | |
| 197 size_t num_output_channels, | |
| 198 complex<float>* const* output) override { | |
| 199 RTC_CHECK_EQ(NonlinearBeamformer::kNumFreqBins, num_freq_bins); | |
| 200 RTC_CHECK_EQ(1u, num_input_channels); | |
| 201 RTC_CHECK_EQ(1u, num_output_channels); | |
| 202 | |
| 203 beamformer_->ApplyPostFilter(input[0], output[0]); | |
| 204 } | |
| 205 | |
| 206 private: | |
| 207 NonlinearBeamformer* beamformer_; | |
| 208 }; | |
| 209 | |
| 186 NonlinearBeamformer::NonlinearBeamformer( | 210 NonlinearBeamformer::NonlinearBeamformer( |
| 187 const std::vector<Point>& array_geometry, | 211 const std::vector<Point>& array_geometry, |
| 188 SphericalPointf target_direction) | 212 SphericalPointf target_direction) |
| 189 : num_input_channels_(array_geometry.size()), | 213 : num_input_channels_(array_geometry.size()), |
| 190 array_geometry_(GetCenteredArray(array_geometry)), | 214 array_geometry_(GetCenteredArray(array_geometry)), |
| 191 array_normal_(GetArrayNormalIfExists(array_geometry)), | 215 array_normal_(GetArrayNormalIfExists(array_geometry)), |
| 192 min_mic_spacing_(GetMinimumSpacing(array_geometry)), | 216 min_mic_spacing_(GetMinimumSpacing(array_geometry)), |
| 193 target_angle_radians_(target_direction.azimuth()), | 217 target_angle_radians_(target_direction.azimuth()), |
| 194 away_radians_(std::min( | 218 away_radians_(std::min( |
| 195 static_cast<float>(M_PI), | 219 static_cast<float>(M_PI), |
| 196 std::max(kMinAwayRadians, | 220 std::max(kMinAwayRadians, |
| 197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { | 221 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { |
| 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 222 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
| 199 } | 223 } |
| 200 | 224 |
| 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 225 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| 202 chunk_length_ = | 226 chunk_length_ = |
| 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 227 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); |
| 204 sample_rate_hz_ = sample_rate_hz; | 228 sample_rate_hz_ = sample_rate_hz; |
| 205 | 229 |
| 206 high_pass_postfilter_mask_ = 1.f; | 230 high_pass_postfilter_mask_ = 1.f; |
| 207 is_target_present_ = false; | 231 is_target_present_ = false; |
| 208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 232 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
| 209 interference_blocks_count_ = hold_target_blocks_; | 233 interference_blocks_count_ = hold_target_blocks_; |
| 210 | 234 |
| 211 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 235 process_transform_.reset(new LappedTransform(num_input_channels_, |
| 212 1, | 236 1u, |
| 213 chunk_length_, | 237 chunk_length_, |
| 214 window_, | 238 window_, |
| 215 kFftSize, | 239 kFftSize, |
| 216 kFftSize / 2, | 240 kFftSize / 2, |
| 217 this)); | 241 this)); |
| 242 postfilter_transform_.reset(new LappedTransform( | |
| 243 1u, 1u, chunk_length_, window_, kFftSize, kFftSize / 2, | |
| 244 new PostFilterTransform(this))); | |
| 218 for (size_t i = 0; i < kNumFreqBins; ++i) { | 245 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 219 time_smooth_mask_[i] = 1.f; | 246 time_smooth_mask_[i] = 1.f; |
| 220 final_mask_[i] = 1.f; | 247 final_mask_[i] = 1.f; |
| 221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 248 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
|
peah-webrtc
2016/05/22 21:06:48
Please change to use a precomputed 1/kFftSize * sa
aluebs-webrtc
2016/05/26 01:04:45
Adds unrelated changes to the CL, but if you think
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
| |
| 222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 249 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
|
peah-webrtc
2016/05/22 21:06:48
Please change to use a precomputed 2*pi/kSpeedOfSo
aluebs-webrtc
2016/05/26 01:04:45
Adds unrelated changes to the CL, but if you think
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
| |
| 223 } | 250 } |
| 224 | 251 |
| 225 InitLowFrequencyCorrectionRanges(); | 252 InitLowFrequencyCorrectionRanges(); |
| 226 InitDiffuseCovMats(); | 253 InitDiffuseCovMats(); |
| 227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); | 254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); |
| 228 } | 255 } |
| 229 | 256 |
| 230 // These bin indexes determine the regions over which a mean is taken. This is | 257 // These bin indexes determine the regions over which a mean is taken. This is |
| 231 // applied as a constant value over the adjacent end "frequency correction" | 258 // applied as a constant value over the adjacent end "frequency correction" |
| 232 // regions. | 259 // regions. |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | 391 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
| 365 } | 392 } |
| 366 } | 393 } |
| 367 } | 394 } |
| 368 | 395 |
| 369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 396 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
| 370 ChannelBuffer<float>* output) { | 397 ChannelBuffer<float>* output) { |
| 371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 398 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
| 372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 399 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
| 373 | 400 |
| 374 float old_high_pass_mask = high_pass_postfilter_mask_; | 401 old_high_pass_mask_ = high_pass_postfilter_mask_; |
| 375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 402 process_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| 403 // Copy over only the first channel of each band. | |
|
peah-webrtc
2016/05/22 21:06:48
Have you checked the impact on the signal when thi
aluebs-webrtc
2016/05/26 01:04:45
This is no longer relevant, since we decided offli
peah-webrtc
2016/05/26 08:48:52
I think the perfect reconstruction may actually be
aluebs-webrtc
2016/05/28 03:00:00
I meant that other components are already non-line
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
| |
| 404 // This can be done because the effect of the linear beamformer is negligible | |
| 405 // compared to the post-filter. | |
| 406 for (size_t i = 1; i < input.num_bands(); ++i) { | |
| 407 memcpy(output->channels(i)[0], | |
|
peah-webrtc
2016/05/22 21:06:48
what happens if the output is dual channel? Since
aluebs-webrtc
2016/05/26 01:04:45
This can't be done as is, since the input and outp
peah-webrtc
2016/05/30 11:49:25
So this means that the input could have 2 channels
aluebs-webrtc
2016/06/01 00:16:34
I don't think that adding an additional interface
peah-webrtc
2016/06/01 14:51:01
It actually does not add code complexity, as it se
aluebs-webrtc
2016/06/01 22:13:20
Interface removed and changed to what input-only.
| |
| 408 input.channels(i)[0], | |
| 409 input.num_frames_per_band() * sizeof(output->channels(i)[0][0])); | |
| 410 } | |
| 411 } | |
| 412 | |
| 413 void NonlinearBeamformer::PostFilter(const ChannelBuffer<float>& input, | |
| 414 ChannelBuffer<float>* output) { | |
| 415 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | |
| 416 | |
| 417 postfilter_transform_->ProcessChunk(input.channels(0), output->channels(0)); | |
| 418 | |
| 376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible | 419 // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
|
peah-webrtc
2016/05/22 21:06:48
I guess, what you mean is that smoothing is needed
aluebs-webrtc
2016/05/26 01:04:45
Adds unrelated changes to the CL, but if you think
peah-webrtc
2016/05/30 11:49:25
Sounds awesome!
| |
| 377 // discontinuities. | 420 // discontinuities. |
| 378 const float ramp_increment = | 421 const float ramp_increment = |
| 379 (high_pass_postfilter_mask_ - old_high_pass_mask) / | 422 (high_pass_postfilter_mask_ - old_high_pass_mask_) / |
| 380 input.num_frames_per_band(); | 423 input.num_frames_per_band(); |
| 381 // Apply the smoothed high-pass mask to the first channel of each band. | |
| 382 // This can be done because the effect of the linear beamformer is negligible | |
| 383 // compared to the post-filter. | |
| 384 for (size_t i = 1; i < input.num_bands(); ++i) { | 424 for (size_t i = 1; i < input.num_bands(); ++i) { |
| 385 float smoothed_mask = old_high_pass_mask; | 425 float smoothed_mask = old_high_pass_mask_; |
| 386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { | 426 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
| 387 smoothed_mask += ramp_increment; | 427 smoothed_mask += ramp_increment; |
| 388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; | 428 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; |
| 389 } | 429 } |
| 390 } | 430 } |
| 391 } | 431 } |
| 392 | 432 |
| 393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { | 433 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { |
| 394 target_angle_radians_ = target_direction.azimuth(); | 434 target_angle_radians_ = target_direction.azimuth(); |
| 395 InitHighFrequencyCorrectionRanges(); | 435 InitHighFrequencyCorrectionRanges(); |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 449 new_mask_[i] = tmp_mask; | 489 new_mask_[i] = tmp_mask; |
| 450 } | 490 } |
| 451 } | 491 } |
| 452 } | 492 } |
| 453 | 493 |
| 454 ApplyMaskTimeSmoothing(); | 494 ApplyMaskTimeSmoothing(); |
| 455 EstimateTargetPresence(); | 495 EstimateTargetPresence(); |
| 456 ApplyLowFrequencyCorrection(); | 496 ApplyLowFrequencyCorrection(); |
| 457 ApplyHighFrequencyCorrection(); | 497 ApplyHighFrequencyCorrection(); |
| 458 ApplyMaskFrequencySmoothing(); | 498 ApplyMaskFrequencySmoothing(); |
| 459 ApplyMasks(input, output); | 499 ApplyDelayAndSum(input, output); |
| 460 } | 500 } |
| 461 | 501 |
| 462 float NonlinearBeamformer::CalculatePostfilterMask( | 502 float NonlinearBeamformer::CalculatePostfilterMask( |
| 463 const ComplexMatrixF& interf_cov_mat, | 503 const ComplexMatrixF& interf_cov_mat, |
| 464 float rpsiw, | 504 float rpsiw, |
| 465 float ratio_rxiw_rxim, | 505 float ratio_rxiw_rxim, |
| 466 float rmw_r) { | 506 float rmw_r) { |
| 467 float rpsim = Norm(interf_cov_mat, eig_m_); | 507 float rpsim = Norm(interf_cov_mat, eig_m_); |
| 468 | 508 |
| 469 float ratio = 0.f; | 509 float ratio = 0.f; |
| 470 if (rpsim > 0.f) { | 510 if (rpsim > 0.f) { |
| 471 ratio = rpsiw / rpsim; | 511 ratio = rpsiw / rpsim; |
| 472 } | 512 } |
| 473 | 513 |
| 474 float numerator = 1.f - kCutOffConstant; | 514 float numerator = 1.f - kCutOffConstant; |
| 475 if (rmw_r > 0.f) { | 515 if (rmw_r > 0.f) { |
| 476 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r); | 516 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r); |
| 477 } | 517 } |
| 478 | 518 |
| 479 float denominator = 1.f - kCutOffConstant; | 519 float denominator = 1.f - kCutOffConstant; |
| 480 if (ratio_rxiw_rxim > 0.f) { | 520 if (ratio_rxiw_rxim > 0.f) { |
| 481 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim); | 521 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim); |
| 482 } | 522 } |
| 483 | 523 |
| 484 return numerator / denominator; | 524 return numerator / denominator; |
| 485 } | 525 } |
| 486 | 526 |
| 487 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, | 527 void NonlinearBeamformer::ApplyDelayAndSum(const complex_f* const* input, |
| 488 complex_f* const* output) { | 528 complex_f* const* output) { |
| 489 complex_f* output_channel = output[0]; | 529 complex_f* output_channel = output[0]; |
| 490 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 530 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| 491 output_channel[f_ix] = complex_f(0.f, 0.f); | 531 output_channel[f_ix] = complex_f(0.f, 0.f); |
| 492 | 532 |
| 493 const complex_f* delay_sum_mask_els = | 533 const complex_f* delay_sum_mask_els = |
| 494 normalized_delay_sum_masks_[f_ix].elements()[0]; | 534 normalized_delay_sum_masks_[f_ix].elements()[0]; |
| 495 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { | 535 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
| 496 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; | 536 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
| 497 } | 537 } |
| 498 | |
| 499 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; | |
| 500 } | 538 } |
| 501 } | 539 } |
| 502 | 540 |
| 541 void NonlinearBeamformer::ApplyPostFilter(const complex_f* input, | |
| 542 complex_f* output) { | |
| 543 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | |
| 544 output[f_ix] = kCompensationGain * final_mask_[f_ix] * input[f_ix]; | |
| 545 } | |
| 546 } | |
| 547 | |
| 503 // Smooth new_mask_ into time_smooth_mask_. | 548 // Smooth new_mask_ into time_smooth_mask_. |
| 504 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { | 549 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { |
| 505 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { | 550 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |
| 506 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + | 551 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + |
| 507 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; | 552 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; |
| 508 } | 553 } |
| 509 } | 554 } |
| 510 | 555 |
| 511 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. | 556 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. |
| 512 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { | 557 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 570 new_mask_ + high_mean_end_bin_ + 1); | 615 new_mask_ + high_mean_end_bin_ + 1); |
| 571 if (new_mask_[quantile] > kMaskTargetThreshold) { | 616 if (new_mask_[quantile] > kMaskTargetThreshold) { |
| 572 is_target_present_ = true; | 617 is_target_present_ = true; |
| 573 interference_blocks_count_ = 0; | 618 interference_blocks_count_ = 0; |
| 574 } else { | 619 } else { |
| 575 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 620 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
| 576 } | 621 } |
| 577 } | 622 } |
| 578 | 623 |
| 579 } // namespace webrtc | 624 } // namespace webrtc |
| OLD | NEW |