OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #define _USE_MATH_DEFINES | 11 #define _USE_MATH_DEFINES |
12 | 12 |
13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" | 13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" |
14 | 14 |
15 #include <algorithm> | 15 #include <algorithm> |
16 #include <cmath> | 16 #include <cmath> |
17 #include <numeric> | 17 #include <numeric> |
18 #include <vector> | 18 #include <vector> |
19 | 19 |
20 #include "webrtc/base/arraysize.h" | 20 #include "webrtc/base/arraysize.h" |
21 #include "webrtc/common_audio/window_generator.h" | 21 #include "webrtc/common_audio/window_generator.h" |
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" | 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" |
23 | 23 |
24 namespace webrtc { | 24 namespace webrtc { |
25 namespace { | 25 namespace { |
26 | 26 |
27 // Alpha for the Kaiser Bessel Derived window. | 27 // Alpha for the Kaiser Bessel Derived window. |
28 const float kKbdAlpha = 1.5f; | 28 const float kKbdAlpha = 1.5f; |
29 | 29 |
30 // The minimum value a post-processing mask can take. | |
31 const float kMaskMinimum = 0.01f; | |
32 | |
33 const float kSpeedOfSoundMeterSeconds = 343; | 30 const float kSpeedOfSoundMeterSeconds = 343; |
34 | 31 |
35 // For both target and interference angles, PI / 2 is perpendicular to the | 32 // For both target and interference angles, PI / 2 is perpendicular to the |
36 // microphone array, facing forwards. The positive direction goes | 33 // microphone array, facing forwards. The positive direction goes |
37 // counterclockwise. | 34 // counterclockwise. |
38 // The angle at which we amplify sound. | 35 // The angle at which we amplify sound. |
36 // TODO(aluebs): Make the target angle dynamically settable. | |
39 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; | 37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; |
40 | 38 |
41 // The angle at which we suppress sound. Suppression is symmetric around PI / 2 | |
42 // radians, so sound is suppressed at both +|kInterfAngleRadians| and | |
43 // PI - |kInterfAngleRadians|. Since the beamformer is robust, this should | |
44 // suppress sound coming from close angles as well. | |
45 const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f; | |
46 | |
47 // When calculating the interference covariance matrix, this is the weight for | 39 // When calculating the interference covariance matrix, this is the weight for |
48 // the weighted average between the uniform covariance matrix and the angled | 40 // the weighted average between the uniform covariance matrix and the angled |
49 // covariance matrix. | 41 // covariance matrix. |
50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 42 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) |
51 const float kBalance = 0.4f; | 43 const float kBalance = 0.95f; |
52 | 44 |
53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; | 45 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; |
54 | 46 |
55 // TODO(claguna): need comment here. | |
56 const float kBeamwidthConstant = 0.00002f; | |
57 | |
58 // Alpha coefficients for mask smoothing. | 47 // Alpha coefficients for mask smoothing. |
59 const float kMaskTimeSmoothAlpha = 0.2f; | 48 const float kMaskTimeSmoothAlpha = 0.2f; |
60 const float kMaskFrequencySmoothAlpha = 0.6f; | 49 const float kMaskFrequencySmoothAlpha = 0.6f; |
61 | 50 |
62 // The average mask is computed from masks in this mid-frequency range. If these | 51 // The average mask is computed from masks in this mid-frequency range. If these |
63 // ranges are changed |kMaskQuantile| might need to be adjusted. | 52 // ranges are changed |kMaskQuantile| might need to be adjusted. |
64 const int kLowMeanStartHz = 200; | 53 const int kLowMeanStartHz = 200; |
65 const int kLowMeanEndHz = 400; | 54 const int kLowMeanEndHz = 400; |
66 | 55 |
56 // TODO(aluebs): Make the high frequency correction range depend on the target | |
57 // angle. | |
67 const int kHighMeanStartHz = 3000; | 58 const int kHighMeanStartHz = 3000; |
68 const int kHighMeanEndHz = 5000; | 59 const int kHighMeanEndHz = 5000; |
69 | 60 |
61 // Range limiter for subtractive terms in the nominator and denominator of the | |
62 // postfilter expression. It handles the scenario mismatch between the true and | |
63 // model sources (target and interference). | |
64 const float kCutOffConstant = 0.9999; | |
65 | |
70 // Quantile of mask values which is used to estimate target presence. | 66 // Quantile of mask values which is used to estimate target presence. |
71 const float kMaskQuantile = 0.7f; | 67 const float kMaskQuantile = 0.7f; |
72 // Mask threshold over which the data is considered signal and not interference. | 68 // Mask threshold over which the data is considered signal and not interference. |
73 const float kMaskTargetThreshold = 0.3f; | 69 // It has to be updated every time the postfilter calculation is changed |
70 // significantly. | |
71 // TODO(aluebs): Write a tool to tune the target threshold automatically based | |
72 // on files annotated with target and interference ground truth. | |
73 const float kMaskTargetThreshold = 0.01f; | |
74 // Time in seconds after which the data is considered interference if the mask | 74 // Time in seconds after which the data is considered interference if the mask |
75 // does not pass |kMaskTargetThreshold|. | 75 // does not pass |kMaskTargetThreshold|. |
76 const float kHoldTargetSeconds = 0.25f; | 76 const float kHoldTargetSeconds = 0.25f; |
77 | 77 |
78 // To compensate for the attenuation this algorithm introduces to the target | |
79 // signal. It was estimated empirically from a low-noise low-reverberation | |
80 // recording from broadside, since if both channels are exactly the same no | |
Andrew MacDonald
2015/10/13 21:55:16
Perhaps drop the "since if both channels..." part,
aluebs-webrtc
2015/10/16 16:41:14
I dropped it.
| |
81 // attenuation is introduced. | |
82 const float kCompensationGain = 2.f; | |
83 | |
78 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is | 84 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is |
79 // used; to accomplish this, we compute both multiplications in the same loop. | 85 // used; to accomplish this, we compute both multiplications in the same loop. |
80 // The returned norm is clamped to be non-negative. | 86 // The returned norm is clamped to be non-negative. |
81 float Norm(const ComplexMatrix<float>& mat, | 87 float Norm(const ComplexMatrix<float>& mat, |
82 const ComplexMatrix<float>& norm_mat) { | 88 const ComplexMatrix<float>& norm_mat) { |
83 RTC_CHECK_EQ(norm_mat.num_rows(), 1); | 89 RTC_CHECK_EQ(norm_mat.num_rows(), 1); |
84 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); | 90 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); |
85 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); | 91 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); |
86 | 92 |
87 complex<float> first_product = complex<float>(0.f, 0.f); | 93 complex<float> first_product = complex<float>(0.f, 0.f); |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
211 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | 217 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
212 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 218 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
213 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 219 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
214 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 220 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
215 | 221 |
216 high_pass_postfilter_mask_ = 1.f; | 222 high_pass_postfilter_mask_ = 1.f; |
217 is_target_present_ = false; | 223 is_target_present_ = false; |
218 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 224 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
219 interference_blocks_count_ = hold_target_blocks_; | 225 interference_blocks_count_ = hold_target_blocks_; |
220 | 226 |
221 | |
222 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 227 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
223 1, | 228 1, |
224 chunk_length_, | 229 chunk_length_, |
225 window_, | 230 window_, |
226 kFftSize, | 231 kFftSize, |
227 kFftSize / 2, | 232 kFftSize / 2, |
228 this)); | 233 this)); |
229 for (size_t i = 0; i < kNumFreqBins; ++i) { | 234 for (size_t i = 0; i < kNumFreqBins; ++i) { |
230 time_smooth_mask_[i] = 1.f; | 235 time_smooth_mask_[i] = 1.f; |
231 final_mask_[i] = 1.f; | 236 final_mask_[i] = 1.f; |
232 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 237 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
233 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 238 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
234 mask_thresholds_[i] = num_input_channels_ * num_input_channels_ * | |
235 kBeamwidthConstant * wave_numbers_[i] * | |
236 wave_numbers_[i]; | |
237 } | 239 } |
238 | 240 |
239 // Initialize all nonadaptive values before looping through the frames. | 241 // Initialize all nonadaptive values before looping through the frames. |
242 InitInterfAngles(); | |
240 InitDelaySumMasks(); | 243 InitDelaySumMasks(); |
241 InitTargetCovMats(); | 244 InitTargetCovMats(); |
242 InitInterfCovMats(); | 245 InitInterfCovMats(); |
243 | 246 |
244 for (size_t i = 0; i < kNumFreqBins; ++i) { | 247 for (size_t i = 0; i < kNumFreqBins; ++i) { |
245 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 248 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
246 rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]); | 249 rpsiws_[i].clear(); |
247 reflected_rpsiws_[i] = | 250 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
248 Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]); | 251 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
252 } | |
249 } | 253 } |
250 } | 254 } |
251 | 255 |
256 void NonlinearBeamformer::InitInterfAngles() { | |
257 // TODO(aluebs): Make kAway dependent on the mic spacing. | |
258 const float kAway = 0.5; | |
Andrew MacDonald
2015/10/13 21:55:16
Does this reduce interferer suppression as well, o
aluebs-webrtc
2015/10/16 16:41:14
The suppression gradual moving away from the cente
| |
259 | |
260 interf_angles_radians_.clear(); | |
261 // TODO(aluebs): When the target angle is settable, make sure the interferer | |
262 // scenarios aren't reflected over the target one for linear geometries. | |
263 interf_angles_radians_.push_back(kTargetAngleRadians - kAway); | |
264 interf_angles_radians_.push_back(kTargetAngleRadians + kAway); | |
265 } | |
266 | |
252 void NonlinearBeamformer::InitDelaySumMasks() { | 267 void NonlinearBeamformer::InitDelaySumMasks() { |
253 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 268 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
254 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 269 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); |
255 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 270 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, |
256 kFftSize, | 271 kFftSize, |
257 sample_rate_hz_, | 272 sample_rate_hz_, |
258 kSpeedOfSoundMeterSeconds, | 273 kSpeedOfSoundMeterSeconds, |
259 array_geometry_, | 274 array_geometry_, |
260 kTargetAngleRadians, | 275 kTargetAngleRadians, |
261 &delay_sum_masks_[f_ix]); | 276 &delay_sum_masks_[f_ix]); |
262 | 277 |
263 complex_f norm_factor = sqrt( | 278 complex_f norm_factor = sqrt( |
264 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 279 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
265 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 280 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
266 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 281 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
267 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 282 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
268 normalized_delay_sum_masks_[f_ix])); | 283 normalized_delay_sum_masks_[f_ix])); |
269 } | 284 } |
270 } | 285 } |
271 | 286 |
272 void NonlinearBeamformer::InitTargetCovMats() { | 287 void NonlinearBeamformer::InitTargetCovMats() { |
273 for (size_t i = 0; i < kNumFreqBins; ++i) { | 288 for (size_t i = 0; i < kNumFreqBins; ++i) { |
274 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 289 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); |
275 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 290 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); |
276 complex_f normalization_factor = target_cov_mats_[i].Trace(); | |
277 target_cov_mats_[i].Scale(1.f / normalization_factor); | |
278 } | 291 } |
279 } | 292 } |
280 | 293 |
281 void NonlinearBeamformer::InitInterfCovMats() { | 294 void NonlinearBeamformer::InitInterfCovMats() { |
282 for (size_t i = 0; i < kNumFreqBins; ++i) { | 295 for (size_t i = 0; i < kNumFreqBins; ++i) { |
283 interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | |
284 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | 296 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); |
285 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | |
286 | |
287 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | 297 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], |
288 array_geometry_, | 298 array_geometry_, |
289 &uniform_cov_mat); | 299 &uniform_cov_mat); |
290 | 300 complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; |
291 CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds, | |
292 kInterfAngleRadians, | |
293 i, | |
294 kFftSize, | |
295 kNumFreqBins, | |
296 sample_rate_hz_, | |
297 array_geometry_, | |
298 &angled_cov_mat); | |
299 // Normalize matrices before averaging them. | |
300 complex_f normalization_factor = uniform_cov_mat.Trace(); | |
301 uniform_cov_mat.Scale(1.f / normalization_factor); | 301 uniform_cov_mat.Scale(1.f / normalization_factor); |
302 normalization_factor = angled_cov_mat.Trace(); | |
303 angled_cov_mat.Scale(1.f / normalization_factor); | |
304 | |
305 // Average matrices. | |
306 uniform_cov_mat.Scale(1 - kBalance); | 302 uniform_cov_mat.Scale(1 - kBalance); |
307 angled_cov_mat.Scale(kBalance); | 303 interf_cov_mats_[i].clear(); |
308 interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat); | 304 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
309 reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]); | 305 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, |
306 num_input_channels_)); | |
307 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | |
308 CovarianceMatrixGenerator::AngledCovarianceMatrix( | |
309 kSpeedOfSoundMeterSeconds, | |
310 interf_angles_radians_[j], | |
311 i, | |
312 kFftSize, | |
313 kNumFreqBins, | |
314 sample_rate_hz_, | |
315 array_geometry_, | |
316 &angled_cov_mat); | |
317 // Normalize matrices before averaging them. | |
318 normalization_factor = angled_cov_mat.elements()[0][0]; | |
319 angled_cov_mat.Scale(1.f / normalization_factor); | |
320 // Weighted average of matrices. | |
321 angled_cov_mat.Scale(kBalance); | |
322 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); | |
323 } | |
310 } | 324 } |
311 } | 325 } |
312 | 326 |
313 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 327 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
314 ChannelBuffer<float>* output) { | 328 ChannelBuffer<float>* output) { |
315 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 329 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
316 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 330 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
317 | 331 |
318 float old_high_pass_mask = high_pass_postfilter_mask_; | 332 float old_high_pass_mask = high_pass_postfilter_mask_; |
319 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 333 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
369 float rxim = Norm(target_cov_mats_[i], eig_m_); | 383 float rxim = Norm(target_cov_mats_[i], eig_m_); |
370 float ratio_rxiw_rxim = 0.f; | 384 float ratio_rxiw_rxim = 0.f; |
371 if (rxim > 0.f) { | 385 if (rxim > 0.f) { |
372 ratio_rxiw_rxim = rxiws_[i] / rxim; | 386 ratio_rxiw_rxim = rxiws_[i] / rxim; |
373 } | 387 } |
374 | 388 |
375 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_)); | 389 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_)); |
376 rmw *= rmw; | 390 rmw *= rmw; |
377 float rmw_r = rmw.real(); | 391 float rmw_r = rmw.real(); |
378 | 392 |
379 new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i], | 393 new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0], |
380 rpsiws_[i], | 394 rpsiws_[i][0], |
381 ratio_rxiw_rxim, | 395 ratio_rxiw_rxim, |
382 rmw_r, | 396 rmw_r); |
383 mask_thresholds_[i]); | 397 for (size_t j = 1; j < interf_angles_radians_.size(); ++j) { |
384 | 398 float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j], |
385 new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i], | 399 rpsiws_[i][j], |
386 reflected_rpsiws_[i], | 400 ratio_rxiw_rxim, |
387 ratio_rxiw_rxim, | 401 rmw_r); |
388 rmw_r, | 402 if (tmp_mask < new_mask_[i]) { |
389 mask_thresholds_[i]); | 403 new_mask_[i] = tmp_mask; |
404 } | |
405 } | |
390 } | 406 } |
391 | 407 |
392 ApplyMaskTimeSmoothing(); | 408 ApplyMaskTimeSmoothing(); |
393 EstimateTargetPresence(); | 409 EstimateTargetPresence(); |
394 ApplyLowFrequencyCorrection(); | 410 ApplyLowFrequencyCorrection(); |
395 ApplyHighFrequencyCorrection(); | 411 ApplyHighFrequencyCorrection(); |
396 ApplyMaskFrequencySmoothing(); | 412 ApplyMaskFrequencySmoothing(); |
397 ApplyMasks(input, output); | 413 ApplyMasks(input, output); |
398 } | 414 } |
399 | 415 |
400 float NonlinearBeamformer::CalculatePostfilterMask( | 416 float NonlinearBeamformer::CalculatePostfilterMask( |
401 const ComplexMatrixF& interf_cov_mat, | 417 const ComplexMatrixF& interf_cov_mat, |
402 float rpsiw, | 418 float rpsiw, |
403 float ratio_rxiw_rxim, | 419 float ratio_rxiw_rxim, |
404 float rmw_r, | 420 float rmw_r) { |
405 float mask_threshold) { | |
406 float rpsim = Norm(interf_cov_mat, eig_m_); | 421 float rpsim = Norm(interf_cov_mat, eig_m_); |
407 | 422 |
408 // Find lambda. | |
409 float ratio = 0.f; | 423 float ratio = 0.f; |
410 if (rpsim > 0.f) { | 424 if (rpsim > 0.f) { |
411 ratio = rpsiw / rpsim; | 425 ratio = rpsiw / rpsim; |
412 } | 426 } |
413 float numerator = rmw_r - ratio; | |
414 float denominator = ratio_rxiw_rxim - ratio; | |
415 | 427 |
416 float mask = 1.f; | 428 return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) / |
417 if (denominator > mask_threshold) { | 429 (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim)); |
418 float lambda = numerator / denominator; | |
419 mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum); | |
420 } | |
421 return mask; | |
422 } | 430 } |
423 | 431 |
424 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, | 432 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
425 complex_f* const* output) { | 433 complex_f* const* output) { |
426 complex_f* output_channel = output[0]; | 434 complex_f* output_channel = output[0]; |
427 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 435 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
428 output_channel[f_ix] = complex_f(0.f, 0.f); | 436 output_channel[f_ix] = complex_f(0.f, 0.f); |
429 | 437 |
430 const complex_f* delay_sum_mask_els = | 438 const complex_f* delay_sum_mask_els = |
431 normalized_delay_sum_masks_[f_ix].elements()[0]; | 439 normalized_delay_sum_masks_[f_ix].elements()[0]; |
432 for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { | 440 for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
433 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; | 441 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
434 } | 442 } |
435 | 443 |
436 output_channel[f_ix] *= final_mask_[f_ix]; | 444 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; |
437 } | 445 } |
438 } | 446 } |
439 | 447 |
440 // Smooth new_mask_ into time_smooth_mask_. | 448 // Smooth new_mask_ into time_smooth_mask_. |
441 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { | 449 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { |
442 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { | 450 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |
443 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + | 451 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + |
444 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; | 452 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; |
445 } | 453 } |
446 } | 454 } |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
507 new_mask_ + high_mean_end_bin_ + 1); | 515 new_mask_ + high_mean_end_bin_ + 1); |
508 if (new_mask_[quantile] > kMaskTargetThreshold) { | 516 if (new_mask_[quantile] > kMaskTargetThreshold) { |
509 is_target_present_ = true; | 517 is_target_present_ = true; |
510 interference_blocks_count_ = 0; | 518 interference_blocks_count_ = 0; |
511 } else { | 519 } else { |
512 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 520 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
513 } | 521 } |
514 } | 522 } |
515 | 523 |
516 } // namespace webrtc | 524 } // namespace webrtc |
OLD | NEW |