OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #define _USE_MATH_DEFINES | 11 #define _USE_MATH_DEFINES |
12 | 12 |
13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" | 13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" |
14 | 14 |
15 #include <algorithm> | 15 #include <algorithm> |
16 #include <cmath> | 16 #include <cmath> |
17 #include <numeric> | 17 #include <numeric> |
18 #include <vector> | 18 #include <vector> |
19 | 19 |
20 #include "webrtc/base/arraysize.h" | 20 #include "webrtc/base/arraysize.h" |
21 #include "webrtc/common_audio/window_generator.h" | 21 #include "webrtc/common_audio/window_generator.h" |
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator
.h" | 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator
.h" |
23 | 23 |
24 namespace webrtc { | 24 namespace webrtc { |
25 namespace { | 25 namespace { |
26 | 26 |
27 // Alpha for the Kaiser Bessel Derived window. | 27 // Alpha for the Kaiser Bessel Derived window. |
28 const float kKbdAlpha = 1.5f; | 28 const float kKbdAlpha = 1.5f; |
29 | 29 |
30 // The minimum value a post-processing mask can take. | |
31 const float kMaskMinimum = 0.01f; | |
32 | |
33 const float kSpeedOfSoundMeterSeconds = 343; | 30 const float kSpeedOfSoundMeterSeconds = 343; |
34 | 31 |
35 // For both target and interference angles, PI / 2 is perpendicular to the | 32 // For both target and interference angles, PI / 2 is perpendicular to the |
36 // microphone array, facing forwards. The positive direction goes | 33 // microphone array, facing forwards. The positive direction goes |
37 // counterclockwise. | 34 // counterclockwise. |
38 // The angle at which we amplify sound. | 35 // The angle at which we amplify sound. |
| 36 // TODO(aluebs): Make the target angle dynamically settable. |
39 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; | 37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; |
40 | 38 |
41 // The angle at which we suppress sound. Suppression is symmetric around PI / 2 | |
42 // radians, so sound is suppressed at both +|kInterfAngleRadians| and | |
43 // PI - |kInterfAngleRadians|. Since the beamformer is robust, this should | |
44 // suppress sound coming from close angles as well. | |
45 const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f; | |
46 | |
47 // When calculating the interference covariance matrix, this is the weight for | 39 // When calculating the interference covariance matrix, this is the weight for |
48 // the weighted average between the uniform covariance matrix and the angled | 40 // the weighted average between the uniform covariance matrix and the angled |
49 // covariance matrix. | 41 // covariance matrix. |
50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 42 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) |
51 const float kBalance = 0.4f; | 43 const float kBalance = 0.95f; |
52 | 44 |
53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; | 45 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; |
54 | 46 |
55 // TODO(claguna): need comment here. | |
56 const float kBeamwidthConstant = 0.00002f; | |
57 | |
58 // Alpha coefficients for mask smoothing. | 47 // Alpha coefficients for mask smoothing. |
59 const float kMaskTimeSmoothAlpha = 0.2f; | 48 const float kMaskTimeSmoothAlpha = 0.2f; |
60 const float kMaskFrequencySmoothAlpha = 0.6f; | 49 const float kMaskFrequencySmoothAlpha = 0.6f; |
61 | 50 |
62 // The average mask is computed from masks in this mid-frequency range. If these | 51 // The average mask is computed from masks in this mid-frequency range. If these |
63 // ranges are changed |kMaskQuantile| might need to be adjusted. | 52 // ranges are changed |kMaskQuantile| might need to be adjusted. |
64 const int kLowMeanStartHz = 200; | 53 const int kLowMeanStartHz = 200; |
65 const int kLowMeanEndHz = 400; | 54 const int kLowMeanEndHz = 400; |
66 | 55 |
| 56 // TODO(aluebs): Make the high frequency correction range depend on the target |
| 57 // angle. |
67 const int kHighMeanStartHz = 3000; | 58 const int kHighMeanStartHz = 3000; |
68 const int kHighMeanEndHz = 5000; | 59 const int kHighMeanEndHz = 5000; |
69 | 60 |
| 61 // Range limiter for subtractive terms in the nominator and denominator of the |
| 62 // postfilter expression. It handles the scenario mismatch between the true and |
| 63 // model sources (target and interference). |
| 64 const float kCutOffConstant = 0.9999f; |
| 65 |
70 // Quantile of mask values which is used to estimate target presence. | 66 // Quantile of mask values which is used to estimate target presence. |
71 const float kMaskQuantile = 0.7f; | 67 const float kMaskQuantile = 0.7f; |
72 // Mask threshold over which the data is considered signal and not interference. | 68 // Mask threshold over which the data is considered signal and not interference. |
73 const float kMaskTargetThreshold = 0.3f; | 69 // It has to be updated every time the postfilter calculation is changed |
| 70 // significantly. |
| 71 // TODO(aluebs): Write a tool to tune the target threshold automatically based |
| 72 // on files annotated with target and interference ground truth. |
| 73 const float kMaskTargetThreshold = 0.01f; |
74 // Time in seconds after which the data is considered interference if the mask | 74 // Time in seconds after which the data is considered interference if the mask |
75 // does not pass |kMaskTargetThreshold|. | 75 // does not pass |kMaskTargetThreshold|. |
76 const float kHoldTargetSeconds = 0.25f; | 76 const float kHoldTargetSeconds = 0.25f; |
77 | 77 |
| 78 // To compensate for the attenuation this algorithm introduces to the target |
| 79 // signal. It was estimated empirically from a low-noise low-reverberation |
| 80 // recording from broadside. |
| 81 const float kCompensationGain = 2.f; |
| 82 |
78 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is | 83 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is |
79 // used; to accomplish this, we compute both multiplications in the same loop. | 84 // used; to accomplish this, we compute both multiplications in the same loop. |
80 // The returned norm is clamped to be non-negative. | 85 // The returned norm is clamped to be non-negative. |
81 float Norm(const ComplexMatrix<float>& mat, | 86 float Norm(const ComplexMatrix<float>& mat, |
82 const ComplexMatrix<float>& norm_mat) { | 87 const ComplexMatrix<float>& norm_mat) { |
83 RTC_CHECK_EQ(norm_mat.num_rows(), 1); | 88 RTC_CHECK_EQ(norm_mat.num_rows(), 1); |
84 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); | 89 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); |
85 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); | 90 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); |
86 | 91 |
87 complex<float> first_product = complex<float>(0.f, 0.f); | 92 complex<float> first_product = complex<float>(0.f, 0.f); |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
211 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | 216 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
212 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 217 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
213 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 218 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
214 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 219 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
215 | 220 |
216 high_pass_postfilter_mask_ = 1.f; | 221 high_pass_postfilter_mask_ = 1.f; |
217 is_target_present_ = false; | 222 is_target_present_ = false; |
218 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 223 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
219 interference_blocks_count_ = hold_target_blocks_; | 224 interference_blocks_count_ = hold_target_blocks_; |
220 | 225 |
221 | |
222 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 226 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
223 1, | 227 1, |
224 chunk_length_, | 228 chunk_length_, |
225 window_, | 229 window_, |
226 kFftSize, | 230 kFftSize, |
227 kFftSize / 2, | 231 kFftSize / 2, |
228 this)); | 232 this)); |
229 for (size_t i = 0; i < kNumFreqBins; ++i) { | 233 for (size_t i = 0; i < kNumFreqBins; ++i) { |
230 time_smooth_mask_[i] = 1.f; | 234 time_smooth_mask_[i] = 1.f; |
231 final_mask_[i] = 1.f; | 235 final_mask_[i] = 1.f; |
232 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 236 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
233 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 237 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
234 mask_thresholds_[i] = num_input_channels_ * num_input_channels_ * | |
235 kBeamwidthConstant * wave_numbers_[i] * | |
236 wave_numbers_[i]; | |
237 } | 238 } |
238 | 239 |
239 // Initialize all nonadaptive values before looping through the frames. | 240 // Initialize all nonadaptive values before looping through the frames. |
| 241 InitInterfAngles(); |
240 InitDelaySumMasks(); | 242 InitDelaySumMasks(); |
241 InitTargetCovMats(); | 243 InitTargetCovMats(); |
242 InitInterfCovMats(); | 244 InitInterfCovMats(); |
243 | 245 |
244 for (size_t i = 0; i < kNumFreqBins; ++i) { | 246 for (size_t i = 0; i < kNumFreqBins; ++i) { |
245 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 247 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
246 rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]); | 248 rpsiws_[i].clear(); |
247 reflected_rpsiws_[i] = | 249 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
248 Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]); | 250 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
| 251 } |
249 } | 252 } |
250 } | 253 } |
251 | 254 |
| 255 void NonlinearBeamformer::InitInterfAngles() { |
| 256 // TODO(aluebs): Make kAwayRadians dependent on the mic spacing. |
| 257 const float kAwayRadians = 0.5; |
| 258 |
| 259 interf_angles_radians_.clear(); |
| 260 // TODO(aluebs): When the target angle is settable, make sure the interferer |
| 261 // scenarios aren't reflected over the target one for linear geometries. |
| 262 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); |
| 263 interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); |
| 264 } |
| 265 |
252 void NonlinearBeamformer::InitDelaySumMasks() { | 266 void NonlinearBeamformer::InitDelaySumMasks() { |
253 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 267 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
254 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 268 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); |
255 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 269 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, |
256 kFftSize, | 270 kFftSize, |
257 sample_rate_hz_, | 271 sample_rate_hz_, |
258 kSpeedOfSoundMeterSeconds, | 272 kSpeedOfSoundMeterSeconds, |
259 array_geometry_, | 273 array_geometry_, |
260 kTargetAngleRadians, | 274 kTargetAngleRadians, |
261 &delay_sum_masks_[f_ix]); | 275 &delay_sum_masks_[f_ix]); |
262 | 276 |
263 complex_f norm_factor = sqrt( | 277 complex_f norm_factor = sqrt( |
264 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 278 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
265 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 279 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
266 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 280 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
267 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 281 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
268 normalized_delay_sum_masks_[f_ix])); | 282 normalized_delay_sum_masks_[f_ix])); |
269 } | 283 } |
270 } | 284 } |
271 | 285 |
272 void NonlinearBeamformer::InitTargetCovMats() { | 286 void NonlinearBeamformer::InitTargetCovMats() { |
273 for (size_t i = 0; i < kNumFreqBins; ++i) { | 287 for (size_t i = 0; i < kNumFreqBins; ++i) { |
274 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 288 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); |
275 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 289 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); |
276 complex_f normalization_factor = target_cov_mats_[i].Trace(); | |
277 target_cov_mats_[i].Scale(1.f / normalization_factor); | |
278 } | 290 } |
279 } | 291 } |
280 | 292 |
281 void NonlinearBeamformer::InitInterfCovMats() { | 293 void NonlinearBeamformer::InitInterfCovMats() { |
282 for (size_t i = 0; i < kNumFreqBins; ++i) { | 294 for (size_t i = 0; i < kNumFreqBins; ++i) { |
283 interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | |
284 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | 295 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); |
285 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | |
286 | |
287 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | 296 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], |
288 array_geometry_, | 297 array_geometry_, |
289 &uniform_cov_mat); | 298 &uniform_cov_mat); |
290 | 299 complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; |
291 CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds, | |
292 kInterfAngleRadians, | |
293 i, | |
294 kFftSize, | |
295 kNumFreqBins, | |
296 sample_rate_hz_, | |
297 array_geometry_, | |
298 &angled_cov_mat); | |
299 // Normalize matrices before averaging them. | |
300 complex_f normalization_factor = uniform_cov_mat.Trace(); | |
301 uniform_cov_mat.Scale(1.f / normalization_factor); | 300 uniform_cov_mat.Scale(1.f / normalization_factor); |
302 normalization_factor = angled_cov_mat.Trace(); | |
303 angled_cov_mat.Scale(1.f / normalization_factor); | |
304 | |
305 // Average matrices. | |
306 uniform_cov_mat.Scale(1 - kBalance); | 301 uniform_cov_mat.Scale(1 - kBalance); |
307 angled_cov_mat.Scale(kBalance); | 302 interf_cov_mats_[i].clear(); |
308 interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat); | 303 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
309 reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]); | 304 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, |
| 305 num_input_channels_)); |
| 306 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); |
| 307 CovarianceMatrixGenerator::AngledCovarianceMatrix( |
| 308 kSpeedOfSoundMeterSeconds, |
| 309 interf_angles_radians_[j], |
| 310 i, |
| 311 kFftSize, |
| 312 kNumFreqBins, |
| 313 sample_rate_hz_, |
| 314 array_geometry_, |
| 315 &angled_cov_mat); |
| 316 // Normalize matrices before averaging them. |
| 317 normalization_factor = angled_cov_mat.elements()[0][0]; |
| 318 angled_cov_mat.Scale(1.f / normalization_factor); |
| 319 // Weighted average of matrices. |
| 320 angled_cov_mat.Scale(kBalance); |
| 321 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); |
| 322 } |
310 } | 323 } |
311 } | 324 } |
312 | 325 |
313 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 326 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
314 ChannelBuffer<float>* output) { | 327 ChannelBuffer<float>* output) { |
315 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 328 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
316 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 329 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
317 | 330 |
318 float old_high_pass_mask = high_pass_postfilter_mask_; | 331 float old_high_pass_mask = high_pass_postfilter_mask_; |
319 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 332 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 float rxim = Norm(target_cov_mats_[i], eig_m_); | 382 float rxim = Norm(target_cov_mats_[i], eig_m_); |
370 float ratio_rxiw_rxim = 0.f; | 383 float ratio_rxiw_rxim = 0.f; |
371 if (rxim > 0.f) { | 384 if (rxim > 0.f) { |
372 ratio_rxiw_rxim = rxiws_[i] / rxim; | 385 ratio_rxiw_rxim = rxiws_[i] / rxim; |
373 } | 386 } |
374 | 387 |
375 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_)); | 388 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_)); |
376 rmw *= rmw; | 389 rmw *= rmw; |
377 float rmw_r = rmw.real(); | 390 float rmw_r = rmw.real(); |
378 | 391 |
379 new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i], | 392 new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0], |
380 rpsiws_[i], | 393 rpsiws_[i][0], |
381 ratio_rxiw_rxim, | 394 ratio_rxiw_rxim, |
382 rmw_r, | 395 rmw_r); |
383 mask_thresholds_[i]); | 396 for (size_t j = 1; j < interf_angles_radians_.size(); ++j) { |
384 | 397 float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j], |
385 new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i], | 398 rpsiws_[i][j], |
386 reflected_rpsiws_[i], | 399 ratio_rxiw_rxim, |
387 ratio_rxiw_rxim, | 400 rmw_r); |
388 rmw_r, | 401 if (tmp_mask < new_mask_[i]) { |
389 mask_thresholds_[i]); | 402 new_mask_[i] = tmp_mask; |
| 403 } |
| 404 } |
390 } | 405 } |
391 | 406 |
392 ApplyMaskTimeSmoothing(); | 407 ApplyMaskTimeSmoothing(); |
393 EstimateTargetPresence(); | 408 EstimateTargetPresence(); |
394 ApplyLowFrequencyCorrection(); | 409 ApplyLowFrequencyCorrection(); |
395 ApplyHighFrequencyCorrection(); | 410 ApplyHighFrequencyCorrection(); |
396 ApplyMaskFrequencySmoothing(); | 411 ApplyMaskFrequencySmoothing(); |
397 ApplyMasks(input, output); | 412 ApplyMasks(input, output); |
398 } | 413 } |
399 | 414 |
400 float NonlinearBeamformer::CalculatePostfilterMask( | 415 float NonlinearBeamformer::CalculatePostfilterMask( |
401 const ComplexMatrixF& interf_cov_mat, | 416 const ComplexMatrixF& interf_cov_mat, |
402 float rpsiw, | 417 float rpsiw, |
403 float ratio_rxiw_rxim, | 418 float ratio_rxiw_rxim, |
404 float rmw_r, | 419 float rmw_r) { |
405 float mask_threshold) { | |
406 float rpsim = Norm(interf_cov_mat, eig_m_); | 420 float rpsim = Norm(interf_cov_mat, eig_m_); |
407 | 421 |
408 // Find lambda. | |
409 float ratio = 0.f; | 422 float ratio = 0.f; |
410 if (rpsim > 0.f) { | 423 if (rpsim > 0.f) { |
411 ratio = rpsiw / rpsim; | 424 ratio = rpsiw / rpsim; |
412 } | 425 } |
413 float numerator = rmw_r - ratio; | |
414 float denominator = ratio_rxiw_rxim - ratio; | |
415 | 426 |
416 float mask = 1.f; | 427 return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) / |
417 if (denominator > mask_threshold) { | 428 (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim)); |
418 float lambda = numerator / denominator; | |
419 mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum); | |
420 } | |
421 return mask; | |
422 } | 429 } |
423 | 430 |
424 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, | 431 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
425 complex_f* const* output) { | 432 complex_f* const* output) { |
426 complex_f* output_channel = output[0]; | 433 complex_f* output_channel = output[0]; |
427 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 434 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
428 output_channel[f_ix] = complex_f(0.f, 0.f); | 435 output_channel[f_ix] = complex_f(0.f, 0.f); |
429 | 436 |
430 const complex_f* delay_sum_mask_els = | 437 const complex_f* delay_sum_mask_els = |
431 normalized_delay_sum_masks_[f_ix].elements()[0]; | 438 normalized_delay_sum_masks_[f_ix].elements()[0]; |
432 for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { | 439 for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
433 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; | 440 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
434 } | 441 } |
435 | 442 |
436 output_channel[f_ix] *= final_mask_[f_ix]; | 443 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; |
437 } | 444 } |
438 } | 445 } |
439 | 446 |
440 // Smooth new_mask_ into time_smooth_mask_. | 447 // Smooth new_mask_ into time_smooth_mask_. |
441 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { | 448 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { |
442 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { | 449 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |
443 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + | 450 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + |
444 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; | 451 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; |
445 } | 452 } |
446 } | 453 } |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
507 new_mask_ + high_mean_end_bin_ + 1); | 514 new_mask_ + high_mean_end_bin_ + 1); |
508 if (new_mask_[quantile] > kMaskTargetThreshold) { | 515 if (new_mask_[quantile] > kMaskTargetThreshold) { |
509 is_target_present_ = true; | 516 is_target_present_ = true; |
510 interference_blocks_count_ = 0; | 517 interference_blocks_count_ = 0; |
511 } else { | 518 } else { |
512 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 519 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
513 } | 520 } |
514 } | 521 } |
515 | 522 |
516 } // namespace webrtc | 523 } // namespace webrtc |
OLD | NEW |