Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1395453004: Make the high frequency correction range depend on the target angle (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@away
Patch Set: Rebasing Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
54 54
55 // Alpha coefficients for mask smoothing. 55 // Alpha coefficients for mask smoothing.
56 const float kMaskTimeSmoothAlpha = 0.2f; 56 const float kMaskTimeSmoothAlpha = 0.2f;
57 const float kMaskFrequencySmoothAlpha = 0.6f; 57 const float kMaskFrequencySmoothAlpha = 0.6f;
58 58
59 // The average mask is computed from masks in this mid-frequency range. If these 59 // The average mask is computed from masks in this mid-frequency range. If these
60 // ranges are changed |kMaskQuantile| might need to be adjusted. 60 // ranges are changed |kMaskQuantile| might need to be adjusted.
61 const int kLowMeanStartHz = 200; 61 const int kLowMeanStartHz = 200;
62 const int kLowMeanEndHz = 400; 62 const int kLowMeanEndHz = 400;
63 63
64 // TODO(aluebs): Make the high frequency correction range depend on the target
65 // angle.
66 const int kHighMeanStartHz = 3000;
67 const int kHighMeanEndHz = 5000;
68
69 // Range limiter for subtractive terms in the nominator and denominator of the 64 // Range limiter for subtractive terms in the nominator and denominator of the
70 // postfilter expression. It handles the scenario mismatch between the true and 65 // postfilter expression. It handles the scenario mismatch between the true and
71 // model sources (target and interference). 66 // model sources (target and interference).
72 const float kCutOffConstant = 0.9999f; 67 const float kCutOffConstant = 0.9999f;
73 68
74 // Quantile of mask values which is used to estimate target presence. 69 // Quantile of mask values which is used to estimate target presence.
75 const float kMaskQuantile = 0.7f; 70 const float kMaskQuantile = 0.7f;
76 // Mask threshold over which the data is considered signal and not interference. 71 // Mask threshold over which the data is considered signal and not interference.
77 // It has to be updated every time the postfilter calculation is changed 72 // It has to be updated every time the postfilter calculation is changed
78 // significantly. 73 // significantly.
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
200 : num_input_channels_(array_geometry.size()), 195 : num_input_channels_(array_geometry.size()),
201 array_geometry_(GetCenteredArray(array_geometry)), 196 array_geometry_(GetCenteredArray(array_geometry)),
202 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { 197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) {
203 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
204 } 199 }
205 200
206 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
207 chunk_length_ = 202 chunk_length_ =
208 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
209 sample_rate_hz_ = sample_rate_hz; 204 sample_rate_hz_ = sample_rate_hz;
210 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); 205 InitFrequencyCorrectionRanges();
211 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
212 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
213 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
214 // These bin indexes determine the regions over which a mean is taken. This
215 // is applied as a constant value over the adjacent end "frequency correction"
216 // regions.
217 //
218 // low_mean_start_bin_ high_mean_start_bin_
219 // v v constant
220 // |----------------|--------|----------------|-------|----------------|
221 // constant ^ ^
222 // low_mean_end_bin_ high_mean_end_bin_
223 //
224 RTC_DCHECK_GT(low_mean_start_bin_, 0U);
225 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
226 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
227 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
228 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
229 206
230 high_pass_postfilter_mask_ = 1.f; 207 high_pass_postfilter_mask_ = 1.f;
231 is_target_present_ = false; 208 is_target_present_ = false;
232 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; 209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
233 interference_blocks_count_ = hold_target_blocks_; 210 interference_blocks_count_ = hold_target_blocks_;
234 211
235 lapped_transform_.reset(new LappedTransform(num_input_channels_, 212 lapped_transform_.reset(new LappedTransform(num_input_channels_,
236 1, 213 1,
237 chunk_length_, 214 chunk_length_,
238 window_, 215 window_,
(...skipping 15 matching lines...) Expand all
254 231
255 for (size_t i = 0; i < kNumFreqBins; ++i) { 232 for (size_t i = 0; i < kNumFreqBins; ++i) {
256 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); 233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
257 rpsiws_[i].clear(); 234 rpsiws_[i].clear();
258 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { 235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
259 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); 236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
260 } 237 }
261 } 238 }
262 } 239 }
263 240
241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() {
242 const float kAliasingFreqHz =
243 kSpeedOfSoundMeterSeconds /
244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians))));
245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
246 sample_rate_hz_ / 2.f);
247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
248 sample_rate_hz_ / 2.f);
249
250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
254 // These bin indexes determine the regions over which a mean is taken. This
255 // is applied as a constant value over the adjacent end "frequency correction"
256 // regions.
257 //
258 // low_mean_start_bin_ high_mean_start_bin_
259 // v v constant
260 // |----------------|--------|----------------|-------|----------------|
261 // constant ^ ^
262 // low_mean_end_bin_ high_mean_end_bin_
263 //
264 RTC_DCHECK_GT(low_mean_start_bin_, 0U);
265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
269 }
270
271
264 void NonlinearBeamformer::InitInterfAngles() { 272 void NonlinearBeamformer::InitInterfAngles() {
265 const float kAwayRadians = 273 const float kAwayRadians =
266 std::min(static_cast<float>(M_PI), 274 std::min(static_cast<float>(M_PI),
267 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / 275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) /
268 min_mic_spacing_)); 276 min_mic_spacing_));
269 277
270 interf_angles_radians_.clear(); 278 interf_angles_radians_.clear();
271 // TODO(aluebs): When the target angle is settable, make sure the interferer 279 // TODO(aluebs): When the target angle is settable, make sure the interferer
272 // scenarios aren't reflected over the target one for linear geometries. 280 // scenarios aren't reflected over the target one for linear geometries.
273 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); 281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians);
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
525 new_mask_ + high_mean_end_bin_ + 1); 533 new_mask_ + high_mean_end_bin_ + 1);
526 if (new_mask_[quantile] > kMaskTargetThreshold) { 534 if (new_mask_[quantile] > kMaskTargetThreshold) {
527 is_target_present_ = true; 535 is_target_present_ = true;
528 interference_blocks_count_ = 0; 536 interference_blocks_count_ = 0;
529 } else { 537 } else {
530 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; 538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
531 } 539 }
532 } 540 }
533 541
534 } // namespace webrtc 542 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698