webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc - Issue 1378973003: Implement new version of the NonlinearBeamformer

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1378973003: Implement new version of the NonlinearBeamformer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #define _USE_MATH_DEFINES	11 #define _USE_MATH_DEFINES

12	12

13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"	13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"

14	14

15 #include <algorithm>	15 #include <algorithm>

16 #include <cmath>	16 #include <cmath>

17 #include <numeric>	17 #include <numeric>

18 #include <vector>	18 #include <vector>

19	19

20 #include "webrtc/base/arraysize.h"	20 #include "webrtc/base/arraysize.h"

21 #include "webrtc/common_audio/window_generator.h"	21 #include "webrtc/common_audio/window_generator.h"

22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h"	22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h"

23	23

24 namespace webrtc {	24 namespace webrtc {

25 namespace {	25 namespace {

26	26

27 // Alpha for the Kaiser Bessel Derived window.	27 // Alpha for the Kaiser Bessel Derived window.

28 const float kKbdAlpha = 1.5f;	28 const float kKbdAlpha = 1.5f;

29	29

30 // The minimum value a post-processing mask can take.

31 const float kMaskMinimum = 0.01f;

32

33 const float kSpeedOfSoundMeterSeconds = 343;	30 const float kSpeedOfSoundMeterSeconds = 343;

34	31

35 // For both target and interference angles, PI / 2 is perpendicular to the	32 // For both target and interference angles, PI / 2 is perpendicular to the

36 // microphone array, facing forwards. The positive direction goes	33 // microphone array, facing forwards. The positive direction goes

37 // counterclockwise.	34 // counterclockwise.

38 // The angle at which we amplify sound.	35 // The angle at which we amplify sound.

	36 // TODO(aluebs): Make the target angle dynamically settable.

39 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;	37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;

40	38

41 // The angle at which we suppress sound. Suppression is symmetric around PI / 2

42 // radians, so sound is suppressed at both +\|kInterfAngleRadians\| and

43 // PI - \|kInterfAngleRadians\|. Since the beamformer is robust, this should

44 // suppress sound coming from close angles as well.

45 const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;

46

47 // When calculating the interference covariance matrix, this is the weight for	39 // When calculating the interference covariance matrix, this is the weight for

48 // the weighted average between the uniform covariance matrix and the angled	40 // the weighted average between the uniform covariance matrix and the angled

49 // covariance matrix.	41 // covariance matrix.

50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)	42 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)

51 const float kBalance = 0.4f;	43 const float kBalance = 0.95f;

52	44

53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;	45 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;

54	46

55 // TODO(claguna): need comment here.

56 const float kBeamwidthConstant = 0.00002f;

57

58 // Alpha coefficients for mask smoothing.	47 // Alpha coefficients for mask smoothing.

59 const float kMaskTimeSmoothAlpha = 0.2f;	48 const float kMaskTimeSmoothAlpha = 0.2f;

60 const float kMaskFrequencySmoothAlpha = 0.6f;	49 const float kMaskFrequencySmoothAlpha = 0.6f;

61	50

62 // The average mask is computed from masks in this mid-frequency range. If these	51 // The average mask is computed from masks in this mid-frequency range. If these

63 // ranges are changed \|kMaskQuantile\| might need to be adjusted.	52 // ranges are changed \|kMaskQuantile\| might need to be adjusted.

64 const int kLowMeanStartHz = 200;	53 const int kLowMeanStartHz = 200;

65 const int kLowMeanEndHz = 400;	54 const int kLowMeanEndHz = 400;

66	55

	56 // TODO(aluebs): Make the high frequency correction range depend on the target

	57 // angle.

67 const int kHighMeanStartHz = 3000;	58 const int kHighMeanStartHz = 3000;

68 const int kHighMeanEndHz = 5000;	59 const int kHighMeanEndHz = 5000;

69	60

	61 // To handle the scenario mismatch.
	Andrew MacDonald 2015/10/06 23:54:31 Can you expand this comment? Not sure what this me Can you expand this comment? Not sure what this means. aluebs-webrtc 2015/10/07 22:08:05 Done. Show quoted text On 2015/10/06 23:54:31, Andrew MacDonald wrote: > Can you expand this comment? Not sure what this means. Done.
	62 const float kCutOffConstant = 0.9999;

	63

70 // Quantile of mask values which is used to estimate target presence.	64 // Quantile of mask values which is used to estimate target presence.

71 const float kMaskQuantile = 0.7f;	65 const float kMaskQuantile = 0.7f;

72 // Mask threshold over which the data is considered signal and not interference.	66 // Mask threshold over which the data is considered signal and not interference.

73 const float kMaskTargetThreshold = 0.3f;	67 const float kMaskTargetThreshold = 0.01f;
	Andrew MacDonald 2015/10/06 23:54:31 We should probably have a way to tune this automat We should probably have a way to tune this automatically. We could hand annotate a few files with ground truth target and interference, and then search for the optimal mask threshold. This will be a problem whenever we make significant changes here, and I fear forgetting to update it. aluebs-webrtc 2015/10/07 22:08:05 I added a comment that as to be updated every time Show quoted text On 2015/10/06 23:54:31, Andrew MacDonald wrote: > We should probably have a way to tune this automatically. We could hand annotate > a few files with ground truth target and interference, and then search for the > optimal mask threshold. This will be a problem whenever we make significant > changes here, and I fear forgetting to update it. I added a comment that as to be updated every time the postfilter calculation is changed significantly. And also added a TODO to write a tool to tune the target threshold automatically based on files annotated with target and interference ground truth. But leaving that for another CL. Andrew MacDonald 2015/10/13 21:55:16 Yep, sg. Show quoted text On 2015/10/07 22:08:05, aluebs-webrtc wrote: > On 2015/10/06 23:54:31, Andrew MacDonald wrote: > > We should probably have a way to tune this automatically. We could hand > annotate > > a few files with ground truth target and interference, and then search for the > > optimal mask threshold. This will be a problem whenever we make significant > > changes here, and I fear forgetting to update it. > > I added a comment that as to be updated every time the postfilter calculation is > changed significantly. And also added a TODO to write a tool to tune the target > threshold automatically based on files annotated with target and interference > ground truth. But leaving that for another CL. Yep, sg.
74 // Time in seconds after which the data is considered interference if the mask	68 // Time in seconds after which the data is considered interference if the mask

75 // does not pass \|kMaskTargetThreshold\|.	69 // does not pass \|kMaskTargetThreshold\|.

76 const float kHoldTargetSeconds = 0.25f;	70 const float kHoldTargetSeconds = 0.25f;

77	71

78 // Does conjugate(\|norm_mat\|) * \|mat\| * transpose(\|norm_mat\|). No extra space is	72 // Does conjugate(\|norm_mat\|) * \|mat\| * transpose(\|norm_mat\|). No extra space is

79 // used; to accomplish this, we compute both multiplications in the same loop.	73 // used; to accomplish this, we compute both multiplications in the same loop.

80 // The returned norm is clamped to be non-negative.	74 // The returned norm is clamped to be non-negative.

81 float Norm(const ComplexMatrix<float>& mat,	75 float Norm(const ComplexMatrix<float>& mat,

82 const ComplexMatrix<float>& norm_mat) {	76 const ComplexMatrix<float>& norm_mat) {

83 RTC_CHECK_EQ(norm_mat.num_rows(), 1);	77 RTC_CHECK_EQ(norm_mat.num_rows(), 1);

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
211 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);	205 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);

212 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);	206 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);

213 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);	207 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);

214 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);	208 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);

215	209

216 high_pass_postfilter_mask_ = 1.f;	210 high_pass_postfilter_mask_ = 1.f;

217 is_target_present_ = false;	211 is_target_present_ = false;

218 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;	212 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;

219 interference_blocks_count_ = hold_target_blocks_;	213 interference_blocks_count_ = hold_target_blocks_;

220	214

221

222 lapped_transform_.reset(new LappedTransform(num_input_channels_,	215 lapped_transform_.reset(new LappedTransform(num_input_channels_,

223 1,	216 1,

224 chunk_length_,	217 chunk_length_,

225 window_,	218 window_,

226 kFftSize,	219 kFftSize,

227 kFftSize / 2,	220 kFftSize / 2,

228 this));	221 this));

229 for (size_t i = 0; i < kNumFreqBins; ++i) {	222 for (size_t i = 0; i < kNumFreqBins; ++i) {

230 time_smooth_mask_[i] = 1.f;	223 time_smooth_mask_[i] = 1.f;

231 final_mask_[i] = 1.f;	224 final_mask_[i] = 1.f;

232 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;	225 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;

233 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;	226 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;

234 mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *

235 kBeamwidthConstant * wave_numbers_[i] *

236 wave_numbers_[i];

237 }	227 }

238	228

239 // Initialize all nonadaptive values before looping through the frames.	229 // Initialize all nonadaptive values before looping through the frames.

	230 InitInterfAngles();

240 InitDelaySumMasks();	231 InitDelaySumMasks();

241 InitTargetCovMats();	232 InitTargetCovMats();

242 InitInterfCovMats();	233 InitInterfCovMats();

243	234

244 for (size_t i = 0; i < kNumFreqBins; ++i) {	235 for (size_t i = 0; i < kNumFreqBins; ++i) {

245 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);	236 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);

246 rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);	237 rpsiws_[i].clear();

247 reflected_rpsiws_[i] =	238 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

248 Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);	239 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
	Andrew MacDonald 2015/10/06 23:54:31 Is this Norm different from the one you've added i Is this Norm different from the one you've added in this CL? Can they be consolidated? aluebs-webrtc 2015/10/07 22:08:05 Yes, it is the Norm defined in the paper and does Show quoted text On 2015/10/06 23:54:31, Andrew MacDonald wrote: > Is this Norm different from the one you've added in this CL? Can they be > consolidated? Yes, it is the Norm defined in the paper and does conjugate(\|norm_mat\|) * \|mat\| * transpose(\|norm_mat\|).
	240 }

249 }	241 }

250 }	242 }

251	243

	244 void NonlinearBeamformer::InitInterfAngles() {

	245 // TODO(aluebs): Make kAway dependent on the mic spacing.

	246 const float kAway = 0.25;

	247

	248 interf_angles_radians_.clear();

	249 // TODO(aluebs): When the target angle is settable, make sure the interferer

	250 // scenarios aren't reflected over the target one for linear geometries.

	251 interf_angles_radians_.push_back(kTargetAngleRadians - kAway);

	252 interf_angles_radians_.push_back(kTargetAngleRadians + kAway);
	Andrew MacDonald 2015/10/06 23:54:31 This is all known at compile time, but I suppose y This is all known at compile time, but I suppose your thought is that it won't be once the target angle is settable, right? aluebs-webrtc 2015/10/07 22:08:05 Exactly. In a followup CL I will make the target s Show quoted text On 2015/10/06 23:54:31, Andrew MacDonald wrote: > This is all known at compile time, but I suppose your thought is that it won't > be once the target angle is settable, right? Exactly. In a followup CL I will make the target scenario settable, and then this will depend on that.
	253 }

	254

252 void NonlinearBeamformer::InitDelaySumMasks() {	255 void NonlinearBeamformer::InitDelaySumMasks() {

253 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {	256 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {

254 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);	257 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);

255 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,	258 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,

256 kFftSize,	259 kFftSize,

257 sample_rate_hz_,	260 sample_rate_hz_,

258 kSpeedOfSoundMeterSeconds,	261 kSpeedOfSoundMeterSeconds,

259 array_geometry_,	262 array_geometry_,

260 kTargetAngleRadians,	263 kTargetAngleRadians,

261 &delay_sum_masks_[f_ix]);	264 &delay_sum_masks_[f_ix]);

262	265

263 complex_f norm_factor = sqrt(	266 complex_f norm_factor = sqrt(

264 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));	267 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));

265 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);	268 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);

266 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);	269 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);

267 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(	270 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(

268 normalized_delay_sum_masks_[f_ix]));	271 normalized_delay_sum_masks_[f_ix]));

269 }	272 }

270 }	273 }

271	274

272 void NonlinearBeamformer::InitTargetCovMats() {	275 void NonlinearBeamformer::InitTargetCovMats() {

273 for (size_t i = 0; i < kNumFreqBins; ++i) {	276 for (size_t i = 0; i < kNumFreqBins; ++i) {

274 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);	277 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);

275 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);	278 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);

276 complex_f normalization_factor = target_cov_mats_[i].Trace();

277 target_cov_mats_[i].Scale(1.f / normalization_factor);

278 }	279 }

279 }	280 }

280	281

281 void NonlinearBeamformer::InitInterfCovMats() {	282 void NonlinearBeamformer::InitInterfCovMats() {

282 for (size_t i = 0; i < kNumFreqBins; ++i) {	283 for (size_t i = 0; i < kNumFreqBins; ++i) {

283 interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);

284 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);	284 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);

285 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

286

287 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],	285 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],

288 array_geometry_,	286 array_geometry_,

289 &uniform_cov_mat);	287 &uniform_cov_mat);

290	288 complex_f normalization_factor = uniform_cov_mat.elements()[0][0];

291 CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,

292 kInterfAngleRadians,

293 i,

294 kFftSize,

295 kNumFreqBins,

296 sample_rate_hz_,

297 array_geometry_,

298 &angled_cov_mat);

299 // Normalize matrices before averaging them.

300 complex_f normalization_factor = uniform_cov_mat.Trace();

301 uniform_cov_mat.Scale(1.f / normalization_factor);	289 uniform_cov_mat.Scale(1.f / normalization_factor);

302 normalization_factor = angled_cov_mat.Trace();

303 angled_cov_mat.Scale(1.f / normalization_factor);

304

305 // Average matrices.

306 uniform_cov_mat.Scale(1 - kBalance);	290 uniform_cov_mat.Scale(1 - kBalance);

307 angled_cov_mat.Scale(kBalance);	291 interf_cov_mats_[i].clear();

308 interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);	292 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

309 reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);	293 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,

	294 num_input_channels_));

	295 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

	296 CovarianceMatrixGenerator::AngledCovarianceMatrix(

	297 kSpeedOfSoundMeterSeconds,

	298 interf_angles_radians_[j],

	299 i,

	300 kFftSize,

	301 kNumFreqBins,

	302 sample_rate_hz_,

	303 array_geometry_,

	304 &angled_cov_mat);

	305 // Normalize matrices before averaging them.

	306 normalization_factor = angled_cov_mat.elements()[0][0];

	307 angled_cov_mat.Scale(1.f / normalization_factor);

	308 // Average matrices.
	Andrew MacDonald 2015/10/06 23:54:31 Perhaps say "Weighted average of matrices." Perhaps say "Weighted average of matrices." aluebs-webrtc 2015/10/07 22:08:05 Done. Show quoted text On 2015/10/06 23:54:31, Andrew MacDonald wrote: > Perhaps say "Weighted average of matrices." Done.
	309 angled_cov_mat.Scale(kBalance);

	310 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat);

	311 }

310 }	312 }

311 }	313 }

312	314

313 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,	315 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,

314 ChannelBuffer<float>* output) {	316 ChannelBuffer<float>* output) {

315 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);	317 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);

316 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);	318 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);

317	319

318 float old_high_pass_mask = high_pass_postfilter_mask_;	320 float old_high_pass_mask = high_pass_postfilter_mask_;

319 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));	321 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
369 float rxim = Norm(target_cov_mats_[i], eig_m_);	371 float rxim = Norm(target_cov_mats_[i], eig_m_);

370 float ratio_rxiw_rxim = 0.f;	372 float ratio_rxiw_rxim = 0.f;

371 if (rxim > 0.f) {	373 if (rxim > 0.f) {

372 ratio_rxiw_rxim = rxiws_[i] / rxim;	374 ratio_rxiw_rxim = rxiws_[i] / rxim;

373 }	375 }

374	376

375 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));	377 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));

376 rmw *= rmw;	378 rmw *= rmw;

377 float rmw_r = rmw.real();	379 float rmw_r = rmw.real();

378	380

379 new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],	381 new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],

380 rpsiws_[i],	382 rpsiws_[i][0],

381 ratio_rxiw_rxim,	383 ratio_rxiw_rxim,

382 rmw_r,	384 rmw_r);

383 mask_thresholds_[i]);	385 for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {

384	386 float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],

385 new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],	387 rpsiws_[i][j],

386 reflected_rpsiws_[i],	388 ratio_rxiw_rxim,

387 ratio_rxiw_rxim,	389 rmw_r);

388 rmw_r,	390 if (tmp_mask < new_mask_[i]) {

389 mask_thresholds_[i]);	391 new_mask_[i] = tmp_mask;

	392 }

	393 }

390 }	394 }

391	395

392 ApplyMaskTimeSmoothing();	396 ApplyMaskTimeSmoothing();

393 EstimateTargetPresence();	397 EstimateTargetPresence();

394 ApplyLowFrequencyCorrection();	398 ApplyLowFrequencyCorrection();

395 ApplyHighFrequencyCorrection();	399 ApplyHighFrequencyCorrection();

396 ApplyMaskFrequencySmoothing();	400 ApplyMaskFrequencySmoothing();

397 ApplyMasks(input, output);	401 ApplyMasks(input, output);

398 }	402 }

399	403

400 float NonlinearBeamformer::CalculatePostfilterMask(	404 float NonlinearBeamformer::CalculatePostfilterMask(

401 const ComplexMatrixF& interf_cov_mat,	405 const ComplexMatrixF& interf_cov_mat,

402 float rpsiw,	406 float rpsiw,

403 float ratio_rxiw_rxim,	407 float ratio_rxiw_rxim,

404 float rmw_r,	408 float rmw_r) {

405 float mask_threshold) {

406 float rpsim = Norm(interf_cov_mat, eig_m_);	409 float rpsim = Norm(interf_cov_mat, eig_m_);

407	410

408 // Find lambda.

409 float ratio = 0.f;	411 float ratio = 0.f;

410 if (rpsim > 0.f) {	412 if (rpsim > 0.f) {

411 ratio = rpsiw / rpsim;	413 ratio = rpsiw / rpsim;

412 }	414 }

413 float numerator = rmw_r - ratio;

414 float denominator = ratio_rxiw_rxim - ratio;

415	415

416 float mask = 1.f;	416 return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /

417 if (denominator > mask_threshold) {	417 (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));
Andrew MacDonald 2015/10/06 23:54:31 Why don't we need the mask thresholds any longer? Why don't we need the mask thresholds any longer? aluebs-webrtc 2015/10/07 22:08:05 I am not sure what you mean. The expression of the Show quoted text On 2015/10/06 23:54:31, Andrew MacDonald wrote: > Why don't we need the mask thresholds any longer? I am not sure what you mean. The expression of the postfilter is completely different and so are the heuristics around it. If your fear is to have a small denominator, the minimum is going to be (1 - kCutOffConstant) == 0.0001.
418 float lambda = numerator / denominator;

419 mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);

420 }

421 return mask;

422 }	418 }

423	419

424 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,	420 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,

425 complex_f* const* output) {	421 complex_f* const* output) {

426 complex_f* output_channel = output[0];	422 complex_f* output_channel = output[0];

427 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {	423 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {

428 output_channel[f_ix] = complex_f(0.f, 0.f);	424 output_channel[f_ix] = complex_f(0.f, 0.f);

429	425

430 const complex_f* delay_sum_mask_els =	426 const complex_f* delay_sum_mask_els =

431 normalized_delay_sum_masks_[f_ix].elements()[0];	427 normalized_delay_sum_masks_[f_ix].elements()[0];

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
507 new_mask_ + high_mean_end_bin_ + 1);	503 new_mask_ + high_mean_end_bin_ + 1);

508 if (new_mask_[quantile] > kMaskTargetThreshold) {	504 if (new_mask_[quantile] > kMaskTargetThreshold) {

509 is_target_present_ = true;	505 is_target_present_ = true;

510 interference_blocks_count_ = 0;	506 interference_blocks_count_ = 0;

511 } else {	507 } else {

512 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;	508 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;

513 }	509 }

514 }	510 }

515	511

516 } // namespace webrtc	512 } // namespace webrtc

OLD	NEW