webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc - Issue 1394103003: Make the nonlinear beamformer steerable

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1394103003: Make the nonlinear beamformer steerable (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@highfreq

Patch Set: Generalize interferer scenarios Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/beamformer/array_util.h ('K') | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc » ('j') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 11 matching lines...) Expand all Loading...
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h"	22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h"

23	23

24 namespace webrtc {	24 namespace webrtc {

25 namespace {	25 namespace {

26	26

27 // Alpha for the Kaiser Bessel Derived window.	27 // Alpha for the Kaiser Bessel Derived window.

28 const float kKbdAlpha = 1.5f;	28 const float kKbdAlpha = 1.5f;

29	29

30 const float kSpeedOfSoundMeterSeconds = 343;	30 const float kSpeedOfSoundMeterSeconds = 343;

31	31

32 // For both target and interference angles, PI / 2 is perpendicular to the

33 // microphone array, facing forwards. The positive direction goes

34 // counterclockwise.

35 // The angle at which we amplify sound.

36 // TODO(aluebs): Make the target angle dynamically settable.

37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;

38

39 // The minimum separation in radians between the target direction and an	32 // The minimum separation in radians between the target direction and an

40 // interferer scenario.	33 // interferer scenario.

41 const float kMinAwayRadians = 0.2f;	34 const float kMinAwayRadians = 0.2f;

42	35

43 // The separation between the target direction and the closest interferer	36 // The separation between the target direction and the closest interferer

44 // scenario is proportional to this constant.	37 // scenario is proportional to this constant.

45 const float kAwaySlope = 0.008f;	38 const float kAwaySlope = 0.008f;

46	39

47 // When calculating the interference covariance matrix, this is the weight for	40 // When calculating the interference covariance matrix, this is the weight for

48 // the weighted average between the uniform covariance matrix and the angled	41 // the weighted average between the uniform covariance matrix and the angled

49 // covariance matrix.	42 // covariance matrix.

50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)	43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)

51 const float kBalance = 0.95f;	44 const float kBalance = 0.95f;

52	45

53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;	46 const float kHalfBeamWidthRadians = DegreesToRadians(20.f);

54	47

55 // Alpha coefficients for mask smoothing.	48 // Alpha coefficients for mask smoothing.

56 const float kMaskTimeSmoothAlpha = 0.2f;	49 const float kMaskTimeSmoothAlpha = 0.2f;

57 const float kMaskFrequencySmoothAlpha = 0.6f;	50 const float kMaskFrequencySmoothAlpha = 0.6f;

58	51

59 // The average mask is computed from masks in this mid-frequency range. If these	52 // The average mask is computed from masks in this mid-frequency range. If these

60 // ranges are changed \|kMaskQuantile\| might need to be adjusted.	53 // ranges are changed \|kMaskQuantile\| might need to be adjusted.

61 const int kLowMeanStartHz = 200;	54 const int kLowMeanStartHz = 200;

62 const int kLowMeanEndHz = 400;	55 const int kLowMeanEndHz = 400;

63	56

(...skipping 114 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
178 center += array_geometry[i].c[dim];	171 center += array_geometry[i].c[dim];

179 }	172 }

180 center /= array_geometry.size();	173 center /= array_geometry.size();

181 for (size_t i = 0; i < array_geometry.size(); ++i) {	174 for (size_t i = 0; i < array_geometry.size(); ++i) {

182 array_geometry[i].c[dim] -= center;	175 array_geometry[i].c[dim] -= center;

183 }	176 }

184 }	177 }

185 return array_geometry;	178 return array_geometry;

186 }	179 }

187	180

	181 Point PairDirection(const Point& a, const Point& b) {

	182 return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()};

	183 }

	184

	185 float DotProduct(const Point& a, const Point& b) {

	186 return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();

	187 }

	188

	189 bool IsGeometryLinear(std::vector<Point> array_geometry) {
	Andrew MacDonald 2015/10/21 03:27:43 DCHECK_GT(array_geometry.size(), 1); DCHECK_GT(array_geometry.size(), 1); aluebs-webrtc 2015/10/27 18:08:15 Done. Show quoted text On 2015/10/21 03:27:43, Andrew MacDonald wrote: > DCHECK_GT(array_geometry.size(), 1); Done.
	190 const float kMinDotProduct = 0.9999f;

	191 const float kMinNorm = 1e-6f;

	192 const Point first_pair_direction =

	193 PairDirection(array_geometry[0], array_geometry[1]);

	194 float norm_a = std::max(

	195 kMinNorm, DotProduct(first_pair_direction, first_pair_direction));

	196 for (size_t i = 2u; i < array_geometry.size(); ++i) {

	197 const Point pair_direction =

	198 PairDirection(array_geometry[i - 1], array_geometry[i]);

	199 float norm_b =
	Andrew MacDonald 2015/10/21 03:27:43 const const aluebs-webrtc 2015/10/27 18:08:15 Done. Show quoted text On 2015/10/21 03:27:43, Andrew MacDonald wrote: > const Done.
	200 std::max(kMinNorm, DotProduct(pair_direction, pair_direction));

	201 if (std::abs(DotProduct(first_pair_direction, pair_direction) /

	202 std::sqrt(norm_a * norm_b)) > kMinDotProduct) {

	203 return false;

	204 }

	205 }

	206 return true;

	207 }

	208

	209 Point AzimuthToPoint(float azimuth) {

	210 return Point(std::cos(azimuth), std::sin(azimuth), 0.f);

	211 }

	212

188 } // namespace	213 } // namespace

189	214

190 // static	215 // static

191 const size_t NonlinearBeamformer::kNumFreqBins;	216 const size_t NonlinearBeamformer::kNumFreqBins;

192	217

193 NonlinearBeamformer::NonlinearBeamformer(	218 NonlinearBeamformer::NonlinearBeamformer(

194 const std::vector<Point>& array_geometry)	219 const std::vector<Point>& array_geometry,

	220 SphericalPointf target_direction)

195 : num_input_channels_(array_geometry.size()),	221 : num_input_channels_(array_geometry.size()),

196 array_geometry_(GetCenteredArray(array_geometry)),	222 array_geometry_(GetCenteredArray(array_geometry)),

197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) {	223 min_mic_spacing_(GetMinimumSpacing(array_geometry)),

	224 target_angle_radians_(target_direction.azimuth()) {

198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);	225 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);

199 }	226 }

200	227

201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {	228 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {

202 chunk_length_ =	229 chunk_length_ =

203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));	230 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));

204 sample_rate_hz_ = sample_rate_hz;	231 sample_rate_hz_ = sample_rate_hz;

205 InitFrequencyCorrectionRanges();

206	232

207 high_pass_postfilter_mask_ = 1.f;	233 high_pass_postfilter_mask_ = 1.f;

208 is_target_present_ = false;	234 is_target_present_ = false;

209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;	235 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;

210 interference_blocks_count_ = hold_target_blocks_;	236 interference_blocks_count_ = hold_target_blocks_;

211	237

212 lapped_transform_.reset(new LappedTransform(num_input_channels_,	238 lapped_transform_.reset(new LappedTransform(num_input_channels_,

213 1,	239 1,

214 chunk_length_,	240 chunk_length_,

215 window_,	241 window_,

216 kFftSize,	242 kFftSize,

217 kFftSize / 2,	243 kFftSize / 2,

218 this));	244 this));

219 for (size_t i = 0; i < kNumFreqBins; ++i) {	245 for (size_t i = 0; i < kNumFreqBins; ++i) {

220 time_smooth_mask_[i] = 1.f;	246 time_smooth_mask_[i] = 1.f;

221 final_mask_[i] = 1.f;	247 final_mask_[i] = 1.f;

222 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;	248 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;

223 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;	249 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;

224 }	250 }

225	251

226 // Initialize all nonadaptive values before looping through the frames.	252 InitLowFrequencyCorrectionRanges();

227 InitInterfAngles();	253 InitDiffuseCovMats();

228 InitDelaySumMasks();	254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));

229 InitTargetCovMats();

230 InitInterfCovMats();

231

232 for (size_t i = 0; i < kNumFreqBins; ++i) {

233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);

234 rpsiws_[i].clear();

235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));

237 }

238 }

239 }	255 }

240	256

241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() {	257 // These bin indexes determine the regions over which a mean is taken. This is

	258 // applied as a constant value over the adjacent end "frequency correction"

	259 // regions.

	260 //

	261 // low_mean_start_bin_ high_mean_start_bin_

	262 // v v constant

	263 // \|----------------\|--------\|----------------\|-------\|----------------\|

	264 // constant ^ ^

	265 // low_mean_end_bin_ high_mean_end_bin_

	266 //

	267 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {

	268 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);

	269 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);

	270

	271 RTC_DCHECK_GT(low_mean_start_bin_, 0U);

	272 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);

	273 }

	274

	275 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {

242 const float kAliasingFreqHz =	276 const float kAliasingFreqHz =

243 kSpeedOfSoundMeterSeconds /	277 kSpeedOfSoundMeterSeconds /

244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians))));	278 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));

245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,	279 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,

246 sample_rate_hz_ / 2.f);	280 sample_rate_hz_ / 2.f);

247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,	281 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,

248 sample_rate_hz_ / 2.f);	282 sample_rate_hz_ / 2.f);

249

250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);

251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);

252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);	283 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);

253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);	284 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);

254 // These bin indexes determine the regions over which a mean is taken. This	285

255 // is applied as a constant value over the adjacent end "frequency correction"

256 // regions.

257 //

258 // low_mean_start_bin_ high_mean_start_bin_

259 // v v constant

260 // \|----------------\|--------\|----------------\|-------\|----------------\|

261 // constant ^ ^

262 // low_mean_end_bin_ high_mean_end_bin_

263 //

264 RTC_DCHECK_GT(low_mean_start_bin_, 0U);

265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);

266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);	286 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);

267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);	287 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);

268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);	288 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);

269 }	289 }

270	290

271

272 void NonlinearBeamformer::InitInterfAngles() {	291 void NonlinearBeamformer::InitInterfAngles() {
	Andrew MacDonald 2015/10/21 03:27:43 This looks good, but it's probably complicated eno This looks good, but it's probably complicated enough to warrant a test. WDYT? aluebs-webrtc 2015/10/27 18:08:15 Good point! :) And the great part about writing un Show quoted text On 2015/10/21 03:27:43, Andrew MacDonald wrote: > This looks good, but it's probably complicated enough to warrant a test. WDYT? Good point! :) And the great part about writing unittest, is that it forces you to think the contract of the method. So I realized that planar arrays can have ambiguity to when the normal is in the xy-plane. Added a bunch of methods to array_util and unit-tested them, modified this method accordingly and finally added a unit-test for it.
273 const float kAwayRadians =	292 const float kAwayRadians =

274 std::min(static_cast<float>(M_PI),	293 std::min(static_cast<float>(M_PI),

275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) /	294 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) /

276 min_mic_spacing_));	295 min_mic_spacing_));

277	296

278 interf_angles_radians_.clear();	297 interf_angles_radians_.clear();

279 // TODO(aluebs): When the target angle is settable, make sure the interferer	298 if (IsGeometryLinear(array_geometry_)) {

280 // scenarios aren't reflected over the target one for linear geometries.	299 const Point array_direction =

281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians);	300 PairDirection(array_geometry_[0], array_geometry_[1]);

282 interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians);	301 const Point array_normal(array_direction.y(), -array_direction.x(), 0.f);

	302 const Point target_direction = AzimuthToPoint(target_angle_radians_);

	303 const Point clockwise_interf_direction =

	304 AzimuthToPoint(target_angle_radians_ - kAwayRadians);

	305 if (DotProduct(array_normal, target_direction) *
	Andrew MacDonald 2015/10/21 03:27:43 Add some comments to each of these conditions, bec Add some comments to each of these conditions, because it's harder to see what they mean now. Something like: "The target and clockwise interferer are in the same half-plane defined by the array." ... "Otherwise, the interferer will begin reflecting back at the target. Instead rotate it away 180 degrees." aluebs-webrtc 2015/10/27 18:08:15 Done. Show quoted text On 2015/10/21 03:27:43, Andrew MacDonald wrote: > Add some comments to each of these conditions, because it's harder to see what > they mean now. Something like: > > "The target and clockwise interferer are in the same half-plane defined by the > array." > ... > "Otherwise, the interferer will begin reflecting back at the target. Instead > rotate it away 180 degrees." Done.
	306 DotProduct(array_normal, clockwise_interf_direction) >=

	307 0.f) {

	308 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians);

	309 } else {

	310 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians +

	311 M_PI);

	312 }

	313 const Point counterclock_interf_direction =

	314 AzimuthToPoint(target_angle_radians_ + kAwayRadians);

	315 if (DotProduct(array_normal, target_direction) *

	316 DotProduct(array_normal, counterclock_interf_direction) >=

	317 0.f) {

	318 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians);

	319 } else {

	320 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians -

	321 M_PI);

	322 }

	323 } else {

	324 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians);

	325 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians);

	326 }

283 }	327 }

284	328

285 void NonlinearBeamformer::InitDelaySumMasks() {	329 void NonlinearBeamformer::InitDelaySumMasks() {

286 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {	330 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {

287 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);	331 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);

288 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,	332 CovarianceMatrixGenerator::PhaseAlignmentMasks(

289 kFftSize,	333 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,

290 sample_rate_hz_,	334 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);

291 kSpeedOfSoundMeterSeconds,

292 array_geometry_,

293 kTargetAngleRadians,

294 &delay_sum_masks_[f_ix]);

295	335

296 complex_f norm_factor = sqrt(	336 complex_f norm_factor = sqrt(

297 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));	337 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));

298 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);	338 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);

299 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);	339 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);

300 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(	340 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(

301 normalized_delay_sum_masks_[f_ix]));	341 normalized_delay_sum_masks_[f_ix]));

302 }	342 }

303 }	343 }

304	344

305 void NonlinearBeamformer::InitTargetCovMats() {	345 void NonlinearBeamformer::InitTargetCovMats() {

306 for (size_t i = 0; i < kNumFreqBins; ++i) {	346 for (size_t i = 0; i < kNumFreqBins; ++i) {

307 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);	347 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);

308 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);	348 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);

309 }	349 }

310 }	350 }

311	351

	352 void NonlinearBeamformer::InitDiffuseCovMats() {

	353 for (size_t i = 0; i < kNumFreqBins; ++i) {

	354 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);

	355 CovarianceMatrixGenerator::UniformCovarianceMatrix(

	356 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);

	357 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];

	358 uniform_cov_mat_[i].Scale(1.f / normalization_factor);

	359 uniform_cov_mat_[i].Scale(1 - kBalance);

	360 }

	361 }

	362

312 void NonlinearBeamformer::InitInterfCovMats() {	363 void NonlinearBeamformer::InitInterfCovMats() {

313 for (size_t i = 0; i < kNumFreqBins; ++i) {	364 for (size_t i = 0; i < kNumFreqBins; ++i) {

314 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);

315 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],

316 array_geometry_,

317 &uniform_cov_mat);

318 complex_f normalization_factor = uniform_cov_mat.elements()[0][0];

319 uniform_cov_mat.Scale(1.f / normalization_factor);

320 uniform_cov_mat.Scale(1 - kBalance);

321 interf_cov_mats_[i].clear();	365 interf_cov_mats_[i].clear();

322 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {	366 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

323 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,	367 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,

324 num_input_channels_));	368 num_input_channels_));

325 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);	369 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

326 CovarianceMatrixGenerator::AngledCovarianceMatrix(	370 CovarianceMatrixGenerator::AngledCovarianceMatrix(

327 kSpeedOfSoundMeterSeconds,	371 kSpeedOfSoundMeterSeconds,

328 interf_angles_radians_[j],	372 interf_angles_radians_[j],

329 i,	373 i,

330 kFftSize,	374 kFftSize,

331 kNumFreqBins,	375 kNumFreqBins,

332 sample_rate_hz_,	376 sample_rate_hz_,

333 array_geometry_,	377 array_geometry_,

334 &angled_cov_mat);	378 &angled_cov_mat);

335 // Normalize matrices before averaging them.	379 // Normalize matrices before averaging them.

336 normalization_factor = angled_cov_mat.elements()[0][0];	380 complex_f normalization_factor = angled_cov_mat.elements()[0][0];

337 angled_cov_mat.Scale(1.f / normalization_factor);	381 angled_cov_mat.Scale(1.f / normalization_factor);

338 // Weighted average of matrices.	382 // Weighted average of matrices.

339 angled_cov_mat.Scale(kBalance);	383 angled_cov_mat.Scale(kBalance);

340 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat);	384 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);

341 }	385 }

342 }	386 }

343 }	387 }

	388

	389 void NonlinearBeamformer::NormalizeCovMats() {

	390 for (size_t i = 0; i < kNumFreqBins; ++i) {

	391 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);

	392 rpsiws_[i].clear();

	393 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

	394 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));

	395 }

	396 }

	397 }

344	398

345 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,	399 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,

346 ChannelBuffer<float>* output) {	400 ChannelBuffer<float>* output) {

347 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);	401 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);

348 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);	402 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);

349	403

350 float old_high_pass_mask = high_pass_postfilter_mask_;	404 float old_high_pass_mask = high_pass_postfilter_mask_;

351 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));	405 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));

352 // Ramp up/down for smoothing. 1 mask per 10ms results in audible	406 // Ramp up/down for smoothing. 1 mask per 10ms results in audible

353 // discontinuities.	407 // discontinuities.

354 const float ramp_increment =	408 const float ramp_increment =

355 (high_pass_postfilter_mask_ - old_high_pass_mask) /	409 (high_pass_postfilter_mask_ - old_high_pass_mask) /

356 input.num_frames_per_band();	410 input.num_frames_per_band();

357 // Apply delay and sum and post-filter in the time domain. WARNING: only works	411 // Apply delay and sum and post-filter in the time domain. WARNING: only works

358 // because delay-and-sum is not frequency dependent.	412 // because delay-and-sum is not frequency dependent.

359 for (size_t i = 1; i < input.num_bands(); ++i) {	413 for (size_t i = 1; i < input.num_bands(); ++i) {

360 float smoothed_mask = old_high_pass_mask;	414 float smoothed_mask = old_high_pass_mask;

361 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {	415 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {

362 smoothed_mask += ramp_increment;	416 smoothed_mask += ramp_increment;

363	417

364 // Applying the delay and sum (at zero degrees, this is equivalent to	418 // Applying the delay and sum (at zero degrees, this is equivalent to

365 // averaging).	419 // averaging).
	Andrew MacDonald 2015/10/22 01:43:32 CRITICAL ;-) Just noticed this because I'm lookin CRITICAL ;-) Just noticed this because I'm looking into the code for avoiding the multi-channel splitting filter: you need to steer the delay-and-sum beam now. Adding fractional delay filters is going to be pretty annoying for what is a largely negligible effect with two mics. Do you just want to do integer delays? Andrew MacDonald 2015/10/22 01:48:55 I think the Matlab code does the delay-and-sum fil Show quoted text On 2015/10/22 01:43:32, Andrew MacDonald wrote: > CRITICAL ;-) > > Just noticed this because I'm looking into the code for avoiding the > multi-channel splitting filter: you need to steer the delay-and-sum beam now. > > Adding fractional delay filters is going to be pretty annoying for what is a > largely negligible effect with two mics. Do you just want to do integer delays? I think the Matlab code does the delay-and-sum filtering in the frequency domain? Perhaps we should follow that here. Andrew MacDonald 2015/10/22 01:53:03 Ah, that doesn't make any sense. This is for the u Show quoted text On 2015/10/22 01:48:55, Andrew MacDonald wrote: > On 2015/10/22 01:43:32, Andrew MacDonald wrote: > > CRITICAL ;-) > > > > Just noticed this because I'm looking into the code for avoiding the > > multi-channel splitting filter: you need to steer the delay-and-sum beam now. > > > > Adding fractional delay filters is going to be pretty annoying for what is a > > largely negligible effect with two mics. Do you just want to do integer > delays? > > I think the Matlab code does the delay-and-sum filtering in the frequency > domain? Perhaps we should follow that here. Ah, that doesn't make any sense. This is for the upper bands where we don't have a FD representation. Hmm. I'm looking at providing the full-band signal to avoid the multi-channel split though, so in the new typical case, it will work fine to do the delay-and-sum in FD. aluebs-webrtc 2015/10/27 18:08:15 As you said, this is only for the higher bands, so Show quoted text On 2015/10/22 01:53:03, Andrew MacDonald wrote: > On 2015/10/22 01:48:55, Andrew MacDonald wrote: > > On 2015/10/22 01:43:32, Andrew MacDonald wrote: > > > CRITICAL ;-) > > > > > > Just noticed this because I'm looking into the code for avoiding the > > > multi-channel splitting filter: you need to steer the delay-and-sum beam > now. > > > > > > Adding fractional delay filters is going to be pretty annoying for what is a > > > largely negligible effect with two mics. Do you just want to do integer > > delays? > > > > I think the Matlab code does the delay-and-sum filtering in the frequency > > domain? Perhaps we should follow that here. > > Ah, that doesn't make any sense. This is for the upper bands where we don't have > a FD representation. Hmm. > > I'm looking at providing the full-band signal to avoid the multi-channel split > though, so in the new typical case, it will work fine to do the delay-and-sum in > FD. As you said, this is only for the higher bands, so it will not be applicable when the full-band signal is used and we don't want to transform to the FD. I find even the time domain integer delay to be pretty annoying for a largely negligible effect, specially since we would need to take care of the cases when this delay changes dynamically. So I propose to use the first channel for the higher bands and apply the post-filter mask. WDYT? Andrew MacDonald 2015/10/28 01:57:56 This looks good, assuming the effect is as negligi Show quoted text On 2015/10/27 18:08:15, aluebs-webrtc wrote: > As you said, this is only for the higher bands, so it will not be applicable > when the full-band signal is used and we don't want to transform to the FD. I > find even the time domain integer delay to be pretty annoying for a largely > negligible effect, specially since we would need to take care of the cases when > this delay changes dynamically. So I propose to use the first channel for the > higher bands and apply the post-filter mask. WDYT? This looks good, assuming the effect is as negligible as we've been assuming. Did you confirm with a recording? Since we're doing this, could you also drop a TODO here to look into allowing multiple output channels? We could apply the mask to every channel. aluebs-webrtc 2015/10/29 00:34:21 I did confirm that I couldn't hear the difference. Show quoted text On 2015/10/28 01:57:56, Andrew MacDonald wrote: > On 2015/10/27 18:08:15, aluebs-webrtc wrote: > > As you said, this is only for the higher bands, so it will not be applicable > > when the full-band signal is used and we don't want to transform to the FD. I > > find even the time domain integer delay to be pretty annoying for a largely > > negligible effect, specially since we would need to take care of the cases > when > > this delay changes dynamically. So I propose to use the first channel for the > > higher bands and apply the post-filter mask. WDYT? > > This looks good, assuming the effect is as negligible as we've been assuming. > Did you confirm with a recording? > > Since we're doing this, could you also drop a TODO here to look into allowing > multiple output channels? We could apply the mask to every channel. I did confirm that I couldn't hear the difference. Now I checked in matlab to double confirm. For most blocks the SNR (between the signal and the error from not using the D&S for higher bands) is between 20dB and 35dB.
366 float sum = 0.f;	420 float sum = 0.f;

367 for (int k = 0; k < input.num_channels(); ++k) {	421 for (int k = 0; k < input.num_channels(); ++k) {

368 sum += input.channels(i)[k][j];	422 sum += input.channels(i)[k][j];

369 }	423 }

370 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;	424 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;

371 }	425 }

372 }	426 }

373 }	427 }

374	428

	429 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {

	430 target_angle_radians_ = target_direction.azimuth();

	431 InitHighFrequencyCorrectionRanges();

	432 InitInterfAngles();

	433 InitDelaySumMasks();

	434 InitTargetCovMats();

	435 InitInterfCovMats();

	436 NormalizeCovMats();

	437 }

	438

375 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {	439 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {

376 // If more than half-beamwidth degrees away from the beam's center,	440 // If more than half-beamwidth degrees away from the beam's center,

377 // you are out of the beam.	441 // you are out of the beam.

378 return fabs(spherical_point.azimuth() - kTargetAngleRadians) <	442 return fabs(spherical_point.azimuth() - target_angle_radians_) <

379 kHalfBeamWidthRadians;	443 kHalfBeamWidthRadians;

380 }	444 }

381	445

382 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,	446 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,

383 int num_input_channels,	447 int num_input_channels,

384 size_t num_freq_bins,	448 size_t num_freq_bins,

385 int num_output_channels,	449 int num_output_channels,

386 complex_f* const* output) {	450 complex_f* const* output) {

387 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);	451 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);

388 RTC_CHECK_EQ(num_input_channels, num_input_channels_);	452 RTC_CHECK_EQ(num_input_channels, num_input_channels_);

(...skipping 144 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
533 new_mask_ + high_mean_end_bin_ + 1);	597 new_mask_ + high_mean_end_bin_ + 1);

534 if (new_mask_[quantile] > kMaskTargetThreshold) {	598 if (new_mask_[quantile] > kMaskTargetThreshold) {

535 is_target_present_ = true;	599 is_target_present_ = true;

536 interference_blocks_count_ = 0;	600 interference_blocks_count_ = 0;

537 } else {	601 } else {

538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;	602 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;

539 }	603 }

540 }	604 }

541	605

542 } // namespace webrtc	606 } // namespace webrtc

OLD	NEW