OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
54 | 54 |
55 // Alpha coefficients for mask smoothing. | 55 // Alpha coefficients for mask smoothing. |
56 const float kMaskTimeSmoothAlpha = 0.2f; | 56 const float kMaskTimeSmoothAlpha = 0.2f; |
57 const float kMaskFrequencySmoothAlpha = 0.6f; | 57 const float kMaskFrequencySmoothAlpha = 0.6f; |
58 | 58 |
59 // The average mask is computed from masks in this mid-frequency range. If these | 59 // The average mask is computed from masks in this mid-frequency range. If these |
60 // ranges are changed |kMaskQuantile| might need to be adjusted. | 60 // ranges are changed |kMaskQuantile| might need to be adjusted. |
61 const int kLowMeanStartHz = 200; | 61 const int kLowMeanStartHz = 200; |
62 const int kLowMeanEndHz = 400; | 62 const int kLowMeanEndHz = 400; |
63 | 63 |
64 // TODO(aluebs): Make the high frequency correction range depend on the target | |
65 // angle. | |
66 const int kHighMeanStartHz = 3000; | |
67 const int kHighMeanEndHz = 5000; | |
68 | |
69 // Range limiter for subtractive terms in the nominator and denominator of the | 64 // Range limiter for subtractive terms in the nominator and denominator of the |
70 // postfilter expression. It handles the scenario mismatch between the true and | 65 // postfilter expression. It handles the scenario mismatch between the true and |
71 // model sources (target and interference). | 66 // model sources (target and interference). |
72 const float kCutOffConstant = 0.9999f; | 67 const float kCutOffConstant = 0.9999f; |
73 | 68 |
74 // Quantile of mask values which is used to estimate target presence. | 69 // Quantile of mask values which is used to estimate target presence. |
75 const float kMaskQuantile = 0.7f; | 70 const float kMaskQuantile = 0.7f; |
76 // Mask threshold over which the data is considered signal and not interference. | 71 // Mask threshold over which the data is considered signal and not interference. |
77 // It has to be updated every time the postfilter calculation is changed | 72 // It has to be updated every time the postfilter calculation is changed |
78 // significantly. | 73 // significantly. |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
200 : num_input_channels_(array_geometry.size()), | 195 : num_input_channels_(array_geometry.size()), |
201 array_geometry_(GetCenteredArray(array_geometry)), | 196 array_geometry_(GetCenteredArray(array_geometry)), |
202 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { | 197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { |
203 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
204 } | 199 } |
205 | 200 |
206 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
207 chunk_length_ = | 202 chunk_length_ = |
208 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); |
209 sample_rate_hz_ = sample_rate_hz; | 204 sample_rate_hz_ = sample_rate_hz; |
210 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | 205 InitFrequencyCorrectionRanges(); |
211 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
212 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | |
213 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | |
214 // These bin indexes determine the regions over which a mean is taken. This | |
215 // is applied as a constant value over the adjacent end "frequency correction" | |
216 // regions. | |
217 // | |
218 // low_mean_start_bin_ high_mean_start_bin_ | |
219 // v v constant | |
220 // |----------------|--------|----------------|-------|----------------| | |
221 // constant ^ ^ | |
222 // low_mean_end_bin_ high_mean_end_bin_ | |
223 // | |
224 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
225 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
226 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | |
227 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | |
228 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | |
229 | 206 |
230 high_pass_postfilter_mask_ = 1.f; | 207 high_pass_postfilter_mask_ = 1.f; |
231 is_target_present_ = false; | 208 is_target_present_ = false; |
232 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
233 interference_blocks_count_ = hold_target_blocks_; | 210 interference_blocks_count_ = hold_target_blocks_; |
234 | 211 |
235 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 212 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
236 1, | 213 1, |
237 chunk_length_, | 214 chunk_length_, |
238 window_, | 215 window_, |
(...skipping 15 matching lines...) Expand all Loading... |
254 | 231 |
255 for (size_t i = 0; i < kNumFreqBins; ++i) { | 232 for (size_t i = 0; i < kNumFreqBins; ++i) { |
256 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
257 rpsiws_[i].clear(); | 234 rpsiws_[i].clear(); |
258 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
259 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | 236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
260 } | 237 } |
261 } | 238 } |
262 } | 239 } |
263 | 240 |
| 241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() { |
| 242 const float kAliasingFreqHz = |
| 243 kSpeedOfSoundMeterSeconds / |
| 244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); |
| 245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, |
| 246 sample_rate_hz_ / 2.f); |
| 247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, |
| 248 sample_rate_hz_ / 2.f); |
| 249 |
| 250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); |
| 251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); |
| 252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); |
| 253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); |
| 254 // These bin indexes determine the regions over which a mean is taken. This |
| 255 // is applied as a constant value over the adjacent end "frequency correction" |
| 256 // regions. |
| 257 // |
| 258 // low_mean_start_bin_ high_mean_start_bin_ |
| 259 // v v constant |
| 260 // |----------------|--------|----------------|-------|----------------| |
| 261 // constant ^ ^ |
| 262 // low_mean_end_bin_ high_mean_end_bin_ |
| 263 // |
| 264 RTC_DCHECK_GT(low_mean_start_bin_, 0U); |
| 265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
| 266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
| 267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
| 268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
| 269 } |
| 270 |
| 271 |
264 void NonlinearBeamformer::InitInterfAngles() { | 272 void NonlinearBeamformer::InitInterfAngles() { |
265 const float kAwayRadians = | 273 const float kAwayRadians = |
266 std::min(static_cast<float>(M_PI), | 274 std::min(static_cast<float>(M_PI), |
267 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / | 275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / |
268 min_mic_spacing_)); | 276 min_mic_spacing_)); |
269 | 277 |
270 interf_angles_radians_.clear(); | 278 interf_angles_radians_.clear(); |
271 // TODO(aluebs): When the target angle is settable, make sure the interferer | 279 // TODO(aluebs): When the target angle is settable, make sure the interferer |
272 // scenarios aren't reflected over the target one for linear geometries. | 280 // scenarios aren't reflected over the target one for linear geometries. |
273 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); | 281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); |
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
525 new_mask_ + high_mean_end_bin_ + 1); | 533 new_mask_ + high_mean_end_bin_ + 1); |
526 if (new_mask_[quantile] > kMaskTargetThreshold) { | 534 if (new_mask_[quantile] > kMaskTargetThreshold) { |
527 is_target_present_ = true; | 535 is_target_present_ = true; |
528 interference_blocks_count_ = 0; | 536 interference_blocks_count_ = 0; |
529 } else { | 537 } else { |
530 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
531 } | 539 } |
532 } | 540 } |
533 | 541 |
534 } // namespace webrtc | 542 } // namespace webrtc |
OLD | NEW |