OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... | |
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" | 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" |
23 | 23 |
24 namespace webrtc { | 24 namespace webrtc { |
25 namespace { | 25 namespace { |
26 | 26 |
27 // Alpha for the Kaiser Bessel Derived window. | 27 // Alpha for the Kaiser Bessel Derived window. |
28 const float kKbdAlpha = 1.5f; | 28 const float kKbdAlpha = 1.5f; |
29 | 29 |
30 const float kSpeedOfSoundMeterSeconds = 343; | 30 const float kSpeedOfSoundMeterSeconds = 343; |
31 | 31 |
32 // For both target and interference angles, PI / 2 is perpendicular to the | |
33 // microphone array, facing forwards. The positive direction goes | |
34 // counterclockwise. | |
35 // The angle at which we amplify sound. | |
36 // TODO(aluebs): Make the target angle dynamically settable. | |
37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; | |
38 | |
39 // The minimum separation in radians between the target direction and an | 32 // The minimum separation in radians between the target direction and an |
40 // interferer scenario. | 33 // interferer scenario. |
41 const float kMinAwayRadians = 0.2f; | 34 const float kMinAwayRadians = 0.2f; |
42 | 35 |
43 // The separation between the target direction and the closest interferer | 36 // The separation between the target direction and the closest interferer |
44 // scenario is proportional to this constant. | 37 // scenario is proportional to this constant. |
45 const float kAwaySlope = 0.008f; | 38 const float kAwaySlope = 0.008f; |
46 | 39 |
47 // When calculating the interference covariance matrix, this is the weight for | 40 // When calculating the interference covariance matrix, this is the weight for |
48 // the weighted average between the uniform covariance matrix and the angled | 41 // the weighted average between the uniform covariance matrix and the angled |
49 // covariance matrix. | 42 // covariance matrix. |
50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) |
51 const float kBalance = 0.95f; | 44 const float kBalance = 0.95f; |
52 | 45 |
53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; | 46 const float kHalfBeamWidthRadians = DegreesToRadians(20.f); |
54 | 47 |
55 // Alpha coefficients for mask smoothing. | 48 // Alpha coefficients for mask smoothing. |
56 const float kMaskTimeSmoothAlpha = 0.2f; | 49 const float kMaskTimeSmoothAlpha = 0.2f; |
57 const float kMaskFrequencySmoothAlpha = 0.6f; | 50 const float kMaskFrequencySmoothAlpha = 0.6f; |
58 | 51 |
59 // The average mask is computed from masks in this mid-frequency range. If these | 52 // The average mask is computed from masks in this mid-frequency range. If these |
60 // ranges are changed |kMaskQuantile| might need to be adjusted. | 53 // ranges are changed |kMaskQuantile| might need to be adjusted. |
61 const int kLowMeanStartHz = 200; | 54 const int kLowMeanStartHz = 200; |
62 const int kLowMeanEndHz = 400; | 55 const int kLowMeanEndHz = 400; |
63 | 56 |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
178 center += array_geometry[i].c[dim]; | 171 center += array_geometry[i].c[dim]; |
179 } | 172 } |
180 center /= array_geometry.size(); | 173 center /= array_geometry.size(); |
181 for (size_t i = 0; i < array_geometry.size(); ++i) { | 174 for (size_t i = 0; i < array_geometry.size(); ++i) { |
182 array_geometry[i].c[dim] -= center; | 175 array_geometry[i].c[dim] -= center; |
183 } | 176 } |
184 } | 177 } |
185 return array_geometry; | 178 return array_geometry; |
186 } | 179 } |
187 | 180 |
181 Point PairDirection(const Point& a, const Point& b) { | |
182 return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()}; | |
183 } | |
184 | |
185 float DotProduct(const Point& a, const Point& b) { | |
186 return a.x() * b.x() + a.y() * b.y() + a.z() * b.z(); | |
187 } | |
188 | |
189 bool IsGeometryLinear(std::vector<Point> array_geometry) { | |
Andrew MacDonald
2015/10/21 03:27:43
DCHECK_GT(array_geometry.size(), 1);
aluebs-webrtc
2015/10/27 18:08:15
Done.
| |
190 const float kMinDotProduct = 0.9999f; | |
191 const float kMinNorm = 1e-6f; | |
192 const Point first_pair_direction = | |
193 PairDirection(array_geometry[0], array_geometry[1]); | |
194 float norm_a = std::max( | |
195 kMinNorm, DotProduct(first_pair_direction, first_pair_direction)); | |
196 for (size_t i = 2u; i < array_geometry.size(); ++i) { | |
197 const Point pair_direction = | |
198 PairDirection(array_geometry[i - 1], array_geometry[i]); | |
199 float norm_b = | |
Andrew MacDonald
2015/10/21 03:27:43
const
aluebs-webrtc
2015/10/27 18:08:15
Done.
| |
200 std::max(kMinNorm, DotProduct(pair_direction, pair_direction)); | |
201 if (std::abs(DotProduct(first_pair_direction, pair_direction) / | |
202 std::sqrt(norm_a * norm_b)) > kMinDotProduct) { | |
203 return false; | |
204 } | |
205 } | |
206 return true; | |
207 } | |
208 | |
209 Point AzimuthToPoint(float azimuth) { | |
210 return Point(std::cos(azimuth), std::sin(azimuth), 0.f); | |
211 } | |
212 | |
188 } // namespace | 213 } // namespace |
189 | 214 |
190 // static | 215 // static |
191 const size_t NonlinearBeamformer::kNumFreqBins; | 216 const size_t NonlinearBeamformer::kNumFreqBins; |
192 | 217 |
193 NonlinearBeamformer::NonlinearBeamformer( | 218 NonlinearBeamformer::NonlinearBeamformer( |
194 const std::vector<Point>& array_geometry) | 219 const std::vector<Point>& array_geometry, |
220 SphericalPointf target_direction) | |
195 : num_input_channels_(array_geometry.size()), | 221 : num_input_channels_(array_geometry.size()), |
196 array_geometry_(GetCenteredArray(array_geometry)), | 222 array_geometry_(GetCenteredArray(array_geometry)), |
197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { | 223 min_mic_spacing_(GetMinimumSpacing(array_geometry)), |
224 target_angle_radians_(target_direction.azimuth()) { | |
198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 225 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
199 } | 226 } |
200 | 227 |
201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 228 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
202 chunk_length_ = | 229 chunk_length_ = |
203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 230 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); |
204 sample_rate_hz_ = sample_rate_hz; | 231 sample_rate_hz_ = sample_rate_hz; |
205 InitFrequencyCorrectionRanges(); | |
206 | 232 |
207 high_pass_postfilter_mask_ = 1.f; | 233 high_pass_postfilter_mask_ = 1.f; |
208 is_target_present_ = false; | 234 is_target_present_ = false; |
209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 235 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
210 interference_blocks_count_ = hold_target_blocks_; | 236 interference_blocks_count_ = hold_target_blocks_; |
211 | 237 |
212 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 238 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
213 1, | 239 1, |
214 chunk_length_, | 240 chunk_length_, |
215 window_, | 241 window_, |
216 kFftSize, | 242 kFftSize, |
217 kFftSize / 2, | 243 kFftSize / 2, |
218 this)); | 244 this)); |
219 for (size_t i = 0; i < kNumFreqBins; ++i) { | 245 for (size_t i = 0; i < kNumFreqBins; ++i) { |
220 time_smooth_mask_[i] = 1.f; | 246 time_smooth_mask_[i] = 1.f; |
221 final_mask_[i] = 1.f; | 247 final_mask_[i] = 1.f; |
222 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 248 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
223 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 249 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
224 } | 250 } |
225 | 251 |
226 // Initialize all nonadaptive values before looping through the frames. | 252 InitLowFrequencyCorrectionRanges(); |
227 InitInterfAngles(); | 253 InitDiffuseCovMats(); |
228 InitDelaySumMasks(); | 254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); |
229 InitTargetCovMats(); | |
230 InitInterfCovMats(); | |
231 | |
232 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | |
234 rpsiws_[i].clear(); | |
235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | |
236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | |
237 } | |
238 } | |
239 } | 255 } |
240 | 256 |
241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() { | 257 // These bin indexes determine the regions over which a mean is taken. This is |
258 // applied as a constant value over the adjacent end "frequency correction" | |
259 // regions. | |
260 // | |
261 // low_mean_start_bin_ high_mean_start_bin_ | |
262 // v v constant | |
263 // |----------------|--------|----------------|-------|----------------| | |
264 // constant ^ ^ | |
265 // low_mean_end_bin_ high_mean_end_bin_ | |
266 // | |
267 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { | |
268 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | |
269 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
270 | |
271 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
272 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
273 } | |
274 | |
275 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { | |
242 const float kAliasingFreqHz = | 276 const float kAliasingFreqHz = |
243 kSpeedOfSoundMeterSeconds / | 277 kSpeedOfSoundMeterSeconds / |
244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); | 278 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); |
245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, | 279 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, |
246 sample_rate_hz_ / 2.f); | 280 sample_rate_hz_ / 2.f); |
247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, | 281 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, |
248 sample_rate_hz_ / 2.f); | 282 sample_rate_hz_ / 2.f); |
249 | |
250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | |
251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | 283 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); |
253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | 284 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); |
254 // These bin indexes determine the regions over which a mean is taken. This | 285 |
255 // is applied as a constant value over the adjacent end "frequency correction" | |
256 // regions. | |
257 // | |
258 // low_mean_start_bin_ high_mean_start_bin_ | |
259 // v v constant | |
260 // |----------------|--------|----------------|-------|----------------| | |
261 // constant ^ ^ | |
262 // low_mean_end_bin_ high_mean_end_bin_ | |
263 // | |
264 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 286 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 287 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 288 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
269 } | 289 } |
270 | 290 |
271 | |
272 void NonlinearBeamformer::InitInterfAngles() { | 291 void NonlinearBeamformer::InitInterfAngles() { |
Andrew MacDonald
2015/10/21 03:27:43
This looks good, but it's probably complicated eno
aluebs-webrtc
2015/10/27 18:08:15
Good point! :)
And the great part about writing un
| |
273 const float kAwayRadians = | 292 const float kAwayRadians = |
274 std::min(static_cast<float>(M_PI), | 293 std::min(static_cast<float>(M_PI), |
275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / | 294 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / |
276 min_mic_spacing_)); | 295 min_mic_spacing_)); |
277 | 296 |
278 interf_angles_radians_.clear(); | 297 interf_angles_radians_.clear(); |
279 // TODO(aluebs): When the target angle is settable, make sure the interferer | 298 if (IsGeometryLinear(array_geometry_)) { |
280 // scenarios aren't reflected over the target one for linear geometries. | 299 const Point array_direction = |
281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); | 300 PairDirection(array_geometry_[0], array_geometry_[1]); |
282 interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); | 301 const Point array_normal(array_direction.y(), -array_direction.x(), 0.f); |
302 const Point target_direction = AzimuthToPoint(target_angle_radians_); | |
303 const Point clockwise_interf_direction = | |
304 AzimuthToPoint(target_angle_radians_ - kAwayRadians); | |
305 if (DotProduct(array_normal, target_direction) * | |
Andrew MacDonald
2015/10/21 03:27:43
Add some comments to each of these conditions, bec
aluebs-webrtc
2015/10/27 18:08:15
Done.
| |
306 DotProduct(array_normal, clockwise_interf_direction) >= | |
307 0.f) { | |
308 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians); | |
309 } else { | |
310 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians + | |
311 M_PI); | |
312 } | |
313 const Point counterclock_interf_direction = | |
314 AzimuthToPoint(target_angle_radians_ + kAwayRadians); | |
315 if (DotProduct(array_normal, target_direction) * | |
316 DotProduct(array_normal, counterclock_interf_direction) >= | |
317 0.f) { | |
318 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians); | |
319 } else { | |
320 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians - | |
321 M_PI); | |
322 } | |
323 } else { | |
324 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians); | |
325 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians); | |
326 } | |
283 } | 327 } |
284 | 328 |
285 void NonlinearBeamformer::InitDelaySumMasks() { | 329 void NonlinearBeamformer::InitDelaySumMasks() { |
286 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 330 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
287 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 331 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); |
288 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 332 CovarianceMatrixGenerator::PhaseAlignmentMasks( |
289 kFftSize, | 333 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, |
290 sample_rate_hz_, | 334 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); |
291 kSpeedOfSoundMeterSeconds, | |
292 array_geometry_, | |
293 kTargetAngleRadians, | |
294 &delay_sum_masks_[f_ix]); | |
295 | 335 |
296 complex_f norm_factor = sqrt( | 336 complex_f norm_factor = sqrt( |
297 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 337 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
298 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 338 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
299 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 339 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
300 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 340 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
301 normalized_delay_sum_masks_[f_ix])); | 341 normalized_delay_sum_masks_[f_ix])); |
302 } | 342 } |
303 } | 343 } |
304 | 344 |
305 void NonlinearBeamformer::InitTargetCovMats() { | 345 void NonlinearBeamformer::InitTargetCovMats() { |
306 for (size_t i = 0; i < kNumFreqBins; ++i) { | 346 for (size_t i = 0; i < kNumFreqBins; ++i) { |
307 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 347 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); |
308 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 348 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); |
309 } | 349 } |
310 } | 350 } |
311 | 351 |
352 void NonlinearBeamformer::InitDiffuseCovMats() { | |
353 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
354 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); | |
355 CovarianceMatrixGenerator::UniformCovarianceMatrix( | |
356 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]); | |
357 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; | |
358 uniform_cov_mat_[i].Scale(1.f / normalization_factor); | |
359 uniform_cov_mat_[i].Scale(1 - kBalance); | |
360 } | |
361 } | |
362 | |
312 void NonlinearBeamformer::InitInterfCovMats() { | 363 void NonlinearBeamformer::InitInterfCovMats() { |
313 for (size_t i = 0; i < kNumFreqBins; ++i) { | 364 for (size_t i = 0; i < kNumFreqBins; ++i) { |
314 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | |
315 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | |
316 array_geometry_, | |
317 &uniform_cov_mat); | |
318 complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; | |
319 uniform_cov_mat.Scale(1.f / normalization_factor); | |
320 uniform_cov_mat.Scale(1 - kBalance); | |
321 interf_cov_mats_[i].clear(); | 365 interf_cov_mats_[i].clear(); |
322 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 366 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
323 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, | 367 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, |
324 num_input_channels_)); | 368 num_input_channels_)); |
325 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | 369 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); |
326 CovarianceMatrixGenerator::AngledCovarianceMatrix( | 370 CovarianceMatrixGenerator::AngledCovarianceMatrix( |
327 kSpeedOfSoundMeterSeconds, | 371 kSpeedOfSoundMeterSeconds, |
328 interf_angles_radians_[j], | 372 interf_angles_radians_[j], |
329 i, | 373 i, |
330 kFftSize, | 374 kFftSize, |
331 kNumFreqBins, | 375 kNumFreqBins, |
332 sample_rate_hz_, | 376 sample_rate_hz_, |
333 array_geometry_, | 377 array_geometry_, |
334 &angled_cov_mat); | 378 &angled_cov_mat); |
335 // Normalize matrices before averaging them. | 379 // Normalize matrices before averaging them. |
336 normalization_factor = angled_cov_mat.elements()[0][0]; | 380 complex_f normalization_factor = angled_cov_mat.elements()[0][0]; |
337 angled_cov_mat.Scale(1.f / normalization_factor); | 381 angled_cov_mat.Scale(1.f / normalization_factor); |
338 // Weighted average of matrices. | 382 // Weighted average of matrices. |
339 angled_cov_mat.Scale(kBalance); | 383 angled_cov_mat.Scale(kBalance); |
340 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); | 384 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); |
341 } | 385 } |
342 } | 386 } |
343 } | 387 } |
388 | |
389 void NonlinearBeamformer::NormalizeCovMats() { | |
390 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
391 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | |
392 rpsiws_[i].clear(); | |
393 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | |
394 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | |
395 } | |
396 } | |
397 } | |
344 | 398 |
345 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 399 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
346 ChannelBuffer<float>* output) { | 400 ChannelBuffer<float>* output) { |
347 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 401 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
348 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 402 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
349 | 403 |
350 float old_high_pass_mask = high_pass_postfilter_mask_; | 404 float old_high_pass_mask = high_pass_postfilter_mask_; |
351 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 405 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
352 // Ramp up/down for smoothing. 1 mask per 10ms results in audible | 406 // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
353 // discontinuities. | 407 // discontinuities. |
354 const float ramp_increment = | 408 const float ramp_increment = |
355 (high_pass_postfilter_mask_ - old_high_pass_mask) / | 409 (high_pass_postfilter_mask_ - old_high_pass_mask) / |
356 input.num_frames_per_band(); | 410 input.num_frames_per_band(); |
357 // Apply delay and sum and post-filter in the time domain. WARNING: only works | 411 // Apply delay and sum and post-filter in the time domain. WARNING: only works |
358 // because delay-and-sum is not frequency dependent. | 412 // because delay-and-sum is not frequency dependent. |
359 for (size_t i = 1; i < input.num_bands(); ++i) { | 413 for (size_t i = 1; i < input.num_bands(); ++i) { |
360 float smoothed_mask = old_high_pass_mask; | 414 float smoothed_mask = old_high_pass_mask; |
361 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { | 415 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
362 smoothed_mask += ramp_increment; | 416 smoothed_mask += ramp_increment; |
363 | 417 |
364 // Applying the delay and sum (at zero degrees, this is equivalent to | 418 // Applying the delay and sum (at zero degrees, this is equivalent to |
365 // averaging). | 419 // averaging). |
Andrew MacDonald
2015/10/22 01:43:32
CRITICAL ;-)
Just noticed this because I'm lookin
Andrew MacDonald
2015/10/22 01:48:55
I think the Matlab code does the delay-and-sum fil
Andrew MacDonald
2015/10/22 01:53:03
Ah, that doesn't make any sense. This is for the u
aluebs-webrtc
2015/10/27 18:08:15
As you said, this is only for the higher bands, so
Andrew MacDonald
2015/10/28 01:57:56
This looks good, assuming the effect is as negligi
aluebs-webrtc
2015/10/29 00:34:21
I did confirm that I couldn't hear the difference.
| |
366 float sum = 0.f; | 420 float sum = 0.f; |
367 for (int k = 0; k < input.num_channels(); ++k) { | 421 for (int k = 0; k < input.num_channels(); ++k) { |
368 sum += input.channels(i)[k][j]; | 422 sum += input.channels(i)[k][j]; |
369 } | 423 } |
370 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; | 424 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; |
371 } | 425 } |
372 } | 426 } |
373 } | 427 } |
374 | 428 |
429 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { | |
430 target_angle_radians_ = target_direction.azimuth(); | |
431 InitHighFrequencyCorrectionRanges(); | |
432 InitInterfAngles(); | |
433 InitDelaySumMasks(); | |
434 InitTargetCovMats(); | |
435 InitInterfCovMats(); | |
436 NormalizeCovMats(); | |
437 } | |
438 | |
375 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { | 439 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { |
376 // If more than half-beamwidth degrees away from the beam's center, | 440 // If more than half-beamwidth degrees away from the beam's center, |
377 // you are out of the beam. | 441 // you are out of the beam. |
378 return fabs(spherical_point.azimuth() - kTargetAngleRadians) < | 442 return fabs(spherical_point.azimuth() - target_angle_radians_) < |
379 kHalfBeamWidthRadians; | 443 kHalfBeamWidthRadians; |
380 } | 444 } |
381 | 445 |
382 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, | 446 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
383 int num_input_channels, | 447 int num_input_channels, |
384 size_t num_freq_bins, | 448 size_t num_freq_bins, |
385 int num_output_channels, | 449 int num_output_channels, |
386 complex_f* const* output) { | 450 complex_f* const* output) { |
387 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); | 451 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); |
388 RTC_CHECK_EQ(num_input_channels, num_input_channels_); | 452 RTC_CHECK_EQ(num_input_channels, num_input_channels_); |
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
533 new_mask_ + high_mean_end_bin_ + 1); | 597 new_mask_ + high_mean_end_bin_ + 1); |
534 if (new_mask_[quantile] > kMaskTargetThreshold) { | 598 if (new_mask_[quantile] > kMaskTargetThreshold) { |
535 is_target_present_ = true; | 599 is_target_present_ = true; |
536 interference_blocks_count_ = 0; | 600 interference_blocks_count_ = 0; |
537 } else { | 601 } else { |
538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 602 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
539 } | 603 } |
540 } | 604 } |
541 | 605 |
542 } // namespace webrtc | 606 } // namespace webrtc |
OLD | NEW |