Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1394103003: Make the nonlinear beamformer steerable (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@highfreq
Patch Set: Generalize interferer scenarios Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h"
23 23
24 namespace webrtc { 24 namespace webrtc {
25 namespace { 25 namespace {
26 26
27 // Alpha for the Kaiser Bessel Derived window. 27 // Alpha for the Kaiser Bessel Derived window.
28 const float kKbdAlpha = 1.5f; 28 const float kKbdAlpha = 1.5f;
29 29
30 const float kSpeedOfSoundMeterSeconds = 343; 30 const float kSpeedOfSoundMeterSeconds = 343;
31 31
32 // For both target and interference angles, PI / 2 is perpendicular to the
33 // microphone array, facing forwards. The positive direction goes
34 // counterclockwise.
35 // The angle at which we amplify sound.
36 // TODO(aluebs): Make the target angle dynamically settable.
37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
38
39 // The minimum separation in radians between the target direction and an 32 // The minimum separation in radians between the target direction and an
40 // interferer scenario. 33 // interferer scenario.
41 const float kMinAwayRadians = 0.2f; 34 const float kMinAwayRadians = 0.2f;
42 35
43 // The separation between the target direction and the closest interferer 36 // The separation between the target direction and the closest interferer
44 // scenario is proportional to this constant. 37 // scenario is proportional to this constant.
45 const float kAwaySlope = 0.008f; 38 const float kAwaySlope = 0.008f;
46 39
47 // When calculating the interference covariance matrix, this is the weight for 40 // When calculating the interference covariance matrix, this is the weight for
48 // the weighted average between the uniform covariance matrix and the angled 41 // the weighted average between the uniform covariance matrix and the angled
49 // covariance matrix. 42 // covariance matrix.
50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) 43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
51 const float kBalance = 0.95f; 44 const float kBalance = 0.95f;
52 45
53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; 46 const float kHalfBeamWidthRadians = DegreesToRadians(20.f);
54 47
55 // Alpha coefficients for mask smoothing. 48 // Alpha coefficients for mask smoothing.
56 const float kMaskTimeSmoothAlpha = 0.2f; 49 const float kMaskTimeSmoothAlpha = 0.2f;
57 const float kMaskFrequencySmoothAlpha = 0.6f; 50 const float kMaskFrequencySmoothAlpha = 0.6f;
58 51
59 // The average mask is computed from masks in this mid-frequency range. If these 52 // The average mask is computed from masks in this mid-frequency range. If these
60 // ranges are changed |kMaskQuantile| might need to be adjusted. 53 // ranges are changed |kMaskQuantile| might need to be adjusted.
61 const int kLowMeanStartHz = 200; 54 const int kLowMeanStartHz = 200;
62 const int kLowMeanEndHz = 400; 55 const int kLowMeanEndHz = 400;
63 56
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 center += array_geometry[i].c[dim]; 171 center += array_geometry[i].c[dim];
179 } 172 }
180 center /= array_geometry.size(); 173 center /= array_geometry.size();
181 for (size_t i = 0; i < array_geometry.size(); ++i) { 174 for (size_t i = 0; i < array_geometry.size(); ++i) {
182 array_geometry[i].c[dim] -= center; 175 array_geometry[i].c[dim] -= center;
183 } 176 }
184 } 177 }
185 return array_geometry; 178 return array_geometry;
186 } 179 }
187 180
181 Point PairDirection(const Point& a, const Point& b) {
182 return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()};
183 }
184
185 float DotProduct(const Point& a, const Point& b) {
186 return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();
187 }
188
189 bool IsGeometryLinear(std::vector<Point> array_geometry) {
Andrew MacDonald 2015/10/21 03:27:43 DCHECK_GT(array_geometry.size(), 1);
aluebs-webrtc 2015/10/27 18:08:15 Done.
190 const float kMinDotProduct = 0.9999f;
191 const float kMinNorm = 1e-6f;
192 const Point first_pair_direction =
193 PairDirection(array_geometry[0], array_geometry[1]);
194 float norm_a = std::max(
195 kMinNorm, DotProduct(first_pair_direction, first_pair_direction));
196 for (size_t i = 2u; i < array_geometry.size(); ++i) {
197 const Point pair_direction =
198 PairDirection(array_geometry[i - 1], array_geometry[i]);
199 float norm_b =
Andrew MacDonald 2015/10/21 03:27:43 const
aluebs-webrtc 2015/10/27 18:08:15 Done.
200 std::max(kMinNorm, DotProduct(pair_direction, pair_direction));
201 if (std::abs(DotProduct(first_pair_direction, pair_direction) /
202 std::sqrt(norm_a * norm_b)) > kMinDotProduct) {
203 return false;
204 }
205 }
206 return true;
207 }
208
209 Point AzimuthToPoint(float azimuth) {
210 return Point(std::cos(azimuth), std::sin(azimuth), 0.f);
211 }
212
188 } // namespace 213 } // namespace
189 214
190 // static 215 // static
191 const size_t NonlinearBeamformer::kNumFreqBins; 216 const size_t NonlinearBeamformer::kNumFreqBins;
192 217
193 NonlinearBeamformer::NonlinearBeamformer( 218 NonlinearBeamformer::NonlinearBeamformer(
194 const std::vector<Point>& array_geometry) 219 const std::vector<Point>& array_geometry,
220 SphericalPointf target_direction)
195 : num_input_channels_(array_geometry.size()), 221 : num_input_channels_(array_geometry.size()),
196 array_geometry_(GetCenteredArray(array_geometry)), 222 array_geometry_(GetCenteredArray(array_geometry)),
197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { 223 min_mic_spacing_(GetMinimumSpacing(array_geometry)),
224 target_angle_radians_(target_direction.azimuth()) {
198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); 225 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
199 } 226 }
200 227
201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { 228 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
202 chunk_length_ = 229 chunk_length_ =
203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); 230 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
204 sample_rate_hz_ = sample_rate_hz; 231 sample_rate_hz_ = sample_rate_hz;
205 InitFrequencyCorrectionRanges();
206 232
207 high_pass_postfilter_mask_ = 1.f; 233 high_pass_postfilter_mask_ = 1.f;
208 is_target_present_ = false; 234 is_target_present_ = false;
209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; 235 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
210 interference_blocks_count_ = hold_target_blocks_; 236 interference_blocks_count_ = hold_target_blocks_;
211 237
212 lapped_transform_.reset(new LappedTransform(num_input_channels_, 238 lapped_transform_.reset(new LappedTransform(num_input_channels_,
213 1, 239 1,
214 chunk_length_, 240 chunk_length_,
215 window_, 241 window_,
216 kFftSize, 242 kFftSize,
217 kFftSize / 2, 243 kFftSize / 2,
218 this)); 244 this));
219 for (size_t i = 0; i < kNumFreqBins; ++i) { 245 for (size_t i = 0; i < kNumFreqBins; ++i) {
220 time_smooth_mask_[i] = 1.f; 246 time_smooth_mask_[i] = 1.f;
221 final_mask_[i] = 1.f; 247 final_mask_[i] = 1.f;
222 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; 248 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
223 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; 249 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
224 } 250 }
225 251
226 // Initialize all nonadaptive values before looping through the frames. 252 InitLowFrequencyCorrectionRanges();
227 InitInterfAngles(); 253 InitDiffuseCovMats();
228 InitDelaySumMasks(); 254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
229 InitTargetCovMats();
230 InitInterfCovMats();
231
232 for (size_t i = 0; i < kNumFreqBins; ++i) {
233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
234 rpsiws_[i].clear();
235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
237 }
238 }
239 } 255 }
240 256
241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() { 257 // These bin indexes determine the regions over which a mean is taken. This is
258 // applied as a constant value over the adjacent end "frequency correction"
259 // regions.
260 //
261 // low_mean_start_bin_ high_mean_start_bin_
262 // v v constant
263 // |----------------|--------|----------------|-------|----------------|
264 // constant ^ ^
265 // low_mean_end_bin_ high_mean_end_bin_
266 //
267 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
268 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
269 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
270
271 RTC_DCHECK_GT(low_mean_start_bin_, 0U);
272 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
273 }
274
275 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
242 const float kAliasingFreqHz = 276 const float kAliasingFreqHz =
243 kSpeedOfSoundMeterSeconds / 277 kSpeedOfSoundMeterSeconds /
244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); 278 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, 279 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
246 sample_rate_hz_ / 2.f); 280 sample_rate_hz_ / 2.f);
247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, 281 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
248 sample_rate_hz_ / 2.f); 282 sample_rate_hz_ / 2.f);
249
250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); 283 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); 284 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
254 // These bin indexes determine the regions over which a mean is taken. This 285
255 // is applied as a constant value over the adjacent end "frequency correction"
256 // regions.
257 //
258 // low_mean_start_bin_ high_mean_start_bin_
259 // v v constant
260 // |----------------|--------|----------------|-------|----------------|
261 // constant ^ ^
262 // low_mean_end_bin_ high_mean_end_bin_
263 //
264 RTC_DCHECK_GT(low_mean_start_bin_, 0U);
265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); 286 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); 287 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); 288 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
269 } 289 }
270 290
271
272 void NonlinearBeamformer::InitInterfAngles() { 291 void NonlinearBeamformer::InitInterfAngles() {
Andrew MacDonald 2015/10/21 03:27:43 This looks good, but it's probably complicated eno
aluebs-webrtc 2015/10/27 18:08:15 Good point! :) And the great part about writing un
273 const float kAwayRadians = 292 const float kAwayRadians =
274 std::min(static_cast<float>(M_PI), 293 std::min(static_cast<float>(M_PI),
275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / 294 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) /
276 min_mic_spacing_)); 295 min_mic_spacing_));
277 296
278 interf_angles_radians_.clear(); 297 interf_angles_radians_.clear();
279 // TODO(aluebs): When the target angle is settable, make sure the interferer 298 if (IsGeometryLinear(array_geometry_)) {
280 // scenarios aren't reflected over the target one for linear geometries. 299 const Point array_direction =
281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); 300 PairDirection(array_geometry_[0], array_geometry_[1]);
282 interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); 301 const Point array_normal(array_direction.y(), -array_direction.x(), 0.f);
302 const Point target_direction = AzimuthToPoint(target_angle_radians_);
303 const Point clockwise_interf_direction =
304 AzimuthToPoint(target_angle_radians_ - kAwayRadians);
305 if (DotProduct(array_normal, target_direction) *
Andrew MacDonald 2015/10/21 03:27:43 Add some comments to each of these conditions, bec
aluebs-webrtc 2015/10/27 18:08:15 Done.
306 DotProduct(array_normal, clockwise_interf_direction) >=
307 0.f) {
308 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians);
309 } else {
310 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians +
311 M_PI);
312 }
313 const Point counterclock_interf_direction =
314 AzimuthToPoint(target_angle_radians_ + kAwayRadians);
315 if (DotProduct(array_normal, target_direction) *
316 DotProduct(array_normal, counterclock_interf_direction) >=
317 0.f) {
318 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians);
319 } else {
320 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians -
321 M_PI);
322 }
323 } else {
324 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians);
325 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians);
326 }
283 } 327 }
284 328
285 void NonlinearBeamformer::InitDelaySumMasks() { 329 void NonlinearBeamformer::InitDelaySumMasks() {
286 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { 330 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
287 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); 331 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
288 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, 332 CovarianceMatrixGenerator::PhaseAlignmentMasks(
289 kFftSize, 333 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
290 sample_rate_hz_, 334 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
291 kSpeedOfSoundMeterSeconds,
292 array_geometry_,
293 kTargetAngleRadians,
294 &delay_sum_masks_[f_ix]);
295 335
296 complex_f norm_factor = sqrt( 336 complex_f norm_factor = sqrt(
297 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); 337 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
298 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); 338 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
299 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); 339 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
300 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( 340 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
301 normalized_delay_sum_masks_[f_ix])); 341 normalized_delay_sum_masks_[f_ix]));
302 } 342 }
303 } 343 }
304 344
305 void NonlinearBeamformer::InitTargetCovMats() { 345 void NonlinearBeamformer::InitTargetCovMats() {
306 for (size_t i = 0; i < kNumFreqBins; ++i) { 346 for (size_t i = 0; i < kNumFreqBins; ++i) {
307 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); 347 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
308 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); 348 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
309 } 349 }
310 } 350 }
311 351
352 void NonlinearBeamformer::InitDiffuseCovMats() {
353 for (size_t i = 0; i < kNumFreqBins; ++i) {
354 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
355 CovarianceMatrixGenerator::UniformCovarianceMatrix(
356 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
357 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
358 uniform_cov_mat_[i].Scale(1.f / normalization_factor);
359 uniform_cov_mat_[i].Scale(1 - kBalance);
360 }
361 }
362
312 void NonlinearBeamformer::InitInterfCovMats() { 363 void NonlinearBeamformer::InitInterfCovMats() {
313 for (size_t i = 0; i < kNumFreqBins; ++i) { 364 for (size_t i = 0; i < kNumFreqBins; ++i) {
314 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
315 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
316 array_geometry_,
317 &uniform_cov_mat);
318 complex_f normalization_factor = uniform_cov_mat.elements()[0][0];
319 uniform_cov_mat.Scale(1.f / normalization_factor);
320 uniform_cov_mat.Scale(1 - kBalance);
321 interf_cov_mats_[i].clear(); 365 interf_cov_mats_[i].clear();
322 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { 366 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
323 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, 367 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
324 num_input_channels_)); 368 num_input_channels_));
325 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); 369 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
326 CovarianceMatrixGenerator::AngledCovarianceMatrix( 370 CovarianceMatrixGenerator::AngledCovarianceMatrix(
327 kSpeedOfSoundMeterSeconds, 371 kSpeedOfSoundMeterSeconds,
328 interf_angles_radians_[j], 372 interf_angles_radians_[j],
329 i, 373 i,
330 kFftSize, 374 kFftSize,
331 kNumFreqBins, 375 kNumFreqBins,
332 sample_rate_hz_, 376 sample_rate_hz_,
333 array_geometry_, 377 array_geometry_,
334 &angled_cov_mat); 378 &angled_cov_mat);
335 // Normalize matrices before averaging them. 379 // Normalize matrices before averaging them.
336 normalization_factor = angled_cov_mat.elements()[0][0]; 380 complex_f normalization_factor = angled_cov_mat.elements()[0][0];
337 angled_cov_mat.Scale(1.f / normalization_factor); 381 angled_cov_mat.Scale(1.f / normalization_factor);
338 // Weighted average of matrices. 382 // Weighted average of matrices.
339 angled_cov_mat.Scale(kBalance); 383 angled_cov_mat.Scale(kBalance);
340 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); 384 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
341 } 385 }
342 } 386 }
343 } 387 }
388
389 void NonlinearBeamformer::NormalizeCovMats() {
390 for (size_t i = 0; i < kNumFreqBins; ++i) {
391 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
392 rpsiws_[i].clear();
393 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
394 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
395 }
396 }
397 }
344 398
345 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, 399 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
346 ChannelBuffer<float>* output) { 400 ChannelBuffer<float>* output) {
347 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); 401 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
348 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); 402 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
349 403
350 float old_high_pass_mask = high_pass_postfilter_mask_; 404 float old_high_pass_mask = high_pass_postfilter_mask_;
351 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); 405 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
352 // Ramp up/down for smoothing. 1 mask per 10ms results in audible 406 // Ramp up/down for smoothing. 1 mask per 10ms results in audible
353 // discontinuities. 407 // discontinuities.
354 const float ramp_increment = 408 const float ramp_increment =
355 (high_pass_postfilter_mask_ - old_high_pass_mask) / 409 (high_pass_postfilter_mask_ - old_high_pass_mask) /
356 input.num_frames_per_band(); 410 input.num_frames_per_band();
357 // Apply delay and sum and post-filter in the time domain. WARNING: only works 411 // Apply delay and sum and post-filter in the time domain. WARNING: only works
358 // because delay-and-sum is not frequency dependent. 412 // because delay-and-sum is not frequency dependent.
359 for (size_t i = 1; i < input.num_bands(); ++i) { 413 for (size_t i = 1; i < input.num_bands(); ++i) {
360 float smoothed_mask = old_high_pass_mask; 414 float smoothed_mask = old_high_pass_mask;
361 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { 415 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
362 smoothed_mask += ramp_increment; 416 smoothed_mask += ramp_increment;
363 417
364 // Applying the delay and sum (at zero degrees, this is equivalent to 418 // Applying the delay and sum (at zero degrees, this is equivalent to
365 // averaging). 419 // averaging).
Andrew MacDonald 2015/10/22 01:43:32 CRITICAL ;-) Just noticed this because I'm lookin
Andrew MacDonald 2015/10/22 01:48:55 I think the Matlab code does the delay-and-sum fil
Andrew MacDonald 2015/10/22 01:53:03 Ah, that doesn't make any sense. This is for the u
aluebs-webrtc 2015/10/27 18:08:15 As you said, this is only for the higher bands, so
Andrew MacDonald 2015/10/28 01:57:56 This looks good, assuming the effect is as negligi
aluebs-webrtc 2015/10/29 00:34:21 I did confirm that I couldn't hear the difference.
366 float sum = 0.f; 420 float sum = 0.f;
367 for (int k = 0; k < input.num_channels(); ++k) { 421 for (int k = 0; k < input.num_channels(); ++k) {
368 sum += input.channels(i)[k][j]; 422 sum += input.channels(i)[k][j];
369 } 423 }
370 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; 424 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
371 } 425 }
372 } 426 }
373 } 427 }
374 428
429 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
430 target_angle_radians_ = target_direction.azimuth();
431 InitHighFrequencyCorrectionRanges();
432 InitInterfAngles();
433 InitDelaySumMasks();
434 InitTargetCovMats();
435 InitInterfCovMats();
436 NormalizeCovMats();
437 }
438
375 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { 439 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
376 // If more than half-beamwidth degrees away from the beam's center, 440 // If more than half-beamwidth degrees away from the beam's center,
377 // you are out of the beam. 441 // you are out of the beam.
378 return fabs(spherical_point.azimuth() - kTargetAngleRadians) < 442 return fabs(spherical_point.azimuth() - target_angle_radians_) <
379 kHalfBeamWidthRadians; 443 kHalfBeamWidthRadians;
380 } 444 }
381 445
382 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, 446 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
383 int num_input_channels, 447 int num_input_channels,
384 size_t num_freq_bins, 448 size_t num_freq_bins,
385 int num_output_channels, 449 int num_output_channels,
386 complex_f* const* output) { 450 complex_f* const* output) {
387 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); 451 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);
388 RTC_CHECK_EQ(num_input_channels, num_input_channels_); 452 RTC_CHECK_EQ(num_input_channels, num_input_channels_);
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after
533 new_mask_ + high_mean_end_bin_ + 1); 597 new_mask_ + high_mean_end_bin_ + 1);
534 if (new_mask_[quantile] > kMaskTargetThreshold) { 598 if (new_mask_[quantile] > kMaskTargetThreshold) {
535 is_target_present_ = true; 599 is_target_present_ = true;
536 interference_blocks_count_ = 0; 600 interference_blocks_count_ = 0;
537 } else { 601 } else {
538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; 602 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
539 } 603 }
540 } 604 }
541 605
542 } // namespace webrtc 606 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698