OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
54 | 54 |
55 // Alpha coefficients for mask smoothing. | 55 // Alpha coefficients for mask smoothing. |
56 const float kMaskTimeSmoothAlpha = 0.2f; | 56 const float kMaskTimeSmoothAlpha = 0.2f; |
57 const float kMaskFrequencySmoothAlpha = 0.6f; | 57 const float kMaskFrequencySmoothAlpha = 0.6f; |
58 | 58 |
59 // The average mask is computed from masks in this mid-frequency range. If these | 59 // The average mask is computed from masks in this mid-frequency range. If these |
60 // ranges are changed |kMaskQuantile| might need to be adjusted. | 60 // ranges are changed |kMaskQuantile| might need to be adjusted. |
61 const int kLowMeanStartHz = 200; | 61 const int kLowMeanStartHz = 200; |
62 const int kLowMeanEndHz = 400; | 62 const int kLowMeanEndHz = 400; |
63 | 63 |
64 // TODO(aluebs): Make the high frequency correction range depend on the target | |
65 // angle. | |
66 const int kHighMeanStartHz = 3000; | |
67 const int kHighMeanEndHz = 5000; | |
68 | |
69 // Range limiter for subtractive terms in the nominator and denominator of the | 64 // Range limiter for subtractive terms in the nominator and denominator of the |
70 // postfilter expression. It handles the scenario mismatch between the true and | 65 // postfilter expression. It handles the scenario mismatch between the true and |
71 // model sources (target and interference). | 66 // model sources (target and interference). |
72 const float kCutOffConstant = 0.9999; | 67 const float kCutOffConstant = 0.9999; |
73 | 68 |
74 // Quantile of mask values which is used to estimate target presence. | 69 // Quantile of mask values which is used to estimate target presence. |
75 const float kMaskQuantile = 0.7f; | 70 const float kMaskQuantile = 0.7f; |
76 // Mask threshold over which the data is considered signal and not interference. | 71 // Mask threshold over which the data is considered signal and not interference. |
77 // It has to be updated every time the postfilter calculation is changed | 72 // It has to be updated every time the postfilter calculation is changed |
78 // significantly. | 73 // significantly. |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
214 : num_input_channels_(array_geometry.size()), | 209 : num_input_channels_(array_geometry.size()), |
215 array_geometry_(GetCenteredArray(array_geometry)), | 210 array_geometry_(GetCenteredArray(array_geometry)), |
216 mic_spacing_(GetMinimumSpacing(array_geometry)) { | 211 mic_spacing_(GetMinimumSpacing(array_geometry)) { |
217 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 212 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
218 } | 213 } |
219 | 214 |
220 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 215 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
221 chunk_length_ = | 216 chunk_length_ = |
222 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 217 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); |
223 sample_rate_hz_ = sample_rate_hz; | 218 sample_rate_hz_ = sample_rate_hz; |
224 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | 219 InitFrequencyCorrectionRanges(); |
225 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
226 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | |
227 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | |
228 // These bin indexes determine the regions over which a mean is taken. This | |
229 // is applied as a constant value over the adjacent end "frequency correction" | |
230 // regions. | |
231 // | |
232 // low_mean_start_bin_ high_mean_start_bin_ | |
233 // v v constant | |
234 // |----------------|--------|----------------|-------|----------------| | |
235 // constant ^ ^ | |
236 // low_mean_end_bin_ high_mean_end_bin_ | |
237 // | |
238 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
239 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
240 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | |
241 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | |
242 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | |
243 | 220 |
244 high_pass_postfilter_mask_ = 1.f; | 221 high_pass_postfilter_mask_ = 1.f; |
245 is_target_present_ = false; | 222 is_target_present_ = false; |
246 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 223 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
247 interference_blocks_count_ = hold_target_blocks_; | 224 interference_blocks_count_ = hold_target_blocks_; |
248 | 225 |
249 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 226 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
250 1, | 227 1, |
251 chunk_length_, | 228 chunk_length_, |
252 window_, | 229 window_, |
(...skipping 15 matching lines...) Expand all Loading... | |
268 | 245 |
269 for (size_t i = 0; i < kNumFreqBins; ++i) { | 246 for (size_t i = 0; i < kNumFreqBins; ++i) { |
270 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 247 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
271 rpsiws_[i].clear(); | 248 rpsiws_[i].clear(); |
272 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 249 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
273 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | 250 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
274 } | 251 } |
275 } | 252 } |
276 } | 253 } |
277 | 254 |
255 void NonlinearBeamformer::InitFrequencyCorrectionRanges() { | |
256 const float kAliasingFreqHz = | |
257 kSpeedOfSoundMeterSeconds / | |
258 (mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); | |
259 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, | |
260 sample_rate_hz_ / 2.f); | |
261 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, | |
262 sample_rate_hz_ / 2.f); | |
Andrew MacDonald
2015/10/13 22:09:21
This results in a fairly similar range as before w
aluebs-webrtc
2015/10/17 01:01:22
Yes, I tested it on all supported Chromebooks.
Thi
| |
263 | |
264 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | |
265 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
266 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | |
267 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | |
268 // These bin indexes determine the regions over which a mean is taken. This | |
269 // is applied as a constant value over the adjacent end "frequency correction" | |
270 // regions. | |
271 // | |
272 // low_mean_start_bin_ high_mean_start_bin_ | |
273 // v v constant | |
274 // |----------------|--------|----------------|-------|----------------| | |
275 // constant ^ ^ | |
276 // low_mean_end_bin_ high_mean_end_bin_ | |
277 // | |
278 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
279 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
280 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | |
281 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | |
282 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | |
283 } | |
284 | |
285 | |
278 void NonlinearBeamformer::InitInterfAngles() { | 286 void NonlinearBeamformer::InitInterfAngles() { |
279 const float kAway = | 287 const float kAway = |
280 std::min(static_cast<float>(M_PI), | 288 std::min(static_cast<float>(M_PI), |
281 std::max(kMinAwayRadians, | 289 std::max(kMinAwayRadians, |
282 kAwaySlope * static_cast<float>(M_PI) / mic_spacing_)); | 290 kAwaySlope * static_cast<float>(M_PI) / mic_spacing_)); |
283 | 291 |
284 interf_angles_radians_.clear(); | 292 interf_angles_radians_.clear(); |
285 // TODO(aluebs): When the target angle is settable, make sure the interferer | 293 // TODO(aluebs): When the target angle is settable, make sure the interferer |
286 // scenarios aren't reflected over the target one for linear geometries. | 294 // scenarios aren't reflected over the target one for linear geometries. |
287 interf_angles_radians_.push_back(kTargetAngleRadians - kAway); | 295 interf_angles_radians_.push_back(kTargetAngleRadians - kAway); |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
539 new_mask_ + high_mean_end_bin_ + 1); | 547 new_mask_ + high_mean_end_bin_ + 1); |
540 if (new_mask_[quantile] > kMaskTargetThreshold) { | 548 if (new_mask_[quantile] > kMaskTargetThreshold) { |
541 is_target_present_ = true; | 549 is_target_present_ = true; |
542 interference_blocks_count_ = 0; | 550 interference_blocks_count_ = 0; |
543 } else { | 551 } else { |
544 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 552 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
545 } | 553 } |
546 } | 554 } |
547 | 555 |
548 } // namespace webrtc | 556 } // namespace webrtc |
OLD | NEW |