Chromium Code Reviews| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 
| 3 * | 3 * | 
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license | 
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source | 
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found | 
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may | 
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. | 
| 9 */ | 9 */ | 
| 10 | 10 | 
| (...skipping 11 matching lines...) Expand all Loading... | |
| 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" | 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator .h" | 
| 23 | 23 | 
| 24 namespace webrtc { | 24 namespace webrtc { | 
| 25 namespace { | 25 namespace { | 
| 26 | 26 | 
| 27 // Alpha for the Kaiser Bessel Derived window. | 27 // Alpha for the Kaiser Bessel Derived window. | 
| 28 const float kKbdAlpha = 1.5f; | 28 const float kKbdAlpha = 1.5f; | 
| 29 | 29 | 
| 30 const float kSpeedOfSoundMeterSeconds = 343; | 30 const float kSpeedOfSoundMeterSeconds = 343; | 
| 31 | 31 | 
| 32 // For both target and interference angles, PI / 2 is perpendicular to the | |
| 33 // microphone array, facing forwards. The positive direction goes | |
| 34 // counterclockwise. | |
| 35 // The angle at which we amplify sound. | |
| 36 // TODO(aluebs): Make the target angle dynamically settable. | |
| 37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; | |
| 38 | |
| 39 // The minimum separation in radians between the target direction and an | 32 // The minimum separation in radians between the target direction and an | 
| 40 // interferer scenario. | 33 // interferer scenario. | 
| 41 const float kMinAwayRadians = 0.2f; | 34 const float kMinAwayRadians = 0.2f; | 
| 42 | 35 | 
| 43 // The separation between the target direction and the closest interferer | 36 // The separation between the target direction and the closest interferer | 
| 44 // scenario is proportional to this constant. | 37 // scenario is proportional to this constant. | 
| 45 const float kAwaySlope = 0.008f; | 38 const float kAwaySlope = 0.008f; | 
| 46 | 39 | 
| 47 // When calculating the interference covariance matrix, this is the weight for | 40 // When calculating the interference covariance matrix, this is the weight for | 
| 48 // the weighted average between the uniform covariance matrix and the angled | 41 // the weighted average between the uniform covariance matrix and the angled | 
| 49 // covariance matrix. | 42 // covariance matrix. | 
| 50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 
| 51 const float kBalance = 0.95f; | 44 const float kBalance = 0.95f; | 
| 52 | 45 | 
| 53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; | 46 const float kHalfBeamWidthRadians = DegreesToRadians(20.f); | 
| 54 | 47 | 
| 55 // Alpha coefficients for mask smoothing. | 48 // Alpha coefficients for mask smoothing. | 
| 56 const float kMaskTimeSmoothAlpha = 0.2f; | 49 const float kMaskTimeSmoothAlpha = 0.2f; | 
| 57 const float kMaskFrequencySmoothAlpha = 0.6f; | 50 const float kMaskFrequencySmoothAlpha = 0.6f; | 
| 58 | 51 | 
| 59 // The average mask is computed from masks in this mid-frequency range. If these | 52 // The average mask is computed from masks in this mid-frequency range. If these | 
| 60 // ranges are changed |kMaskQuantile| might need to be adjusted. | 53 // ranges are changed |kMaskQuantile| might need to be adjusted. | 
| 61 const int kLowMeanStartHz = 200; | 54 const int kLowMeanStartHz = 200; | 
| 62 const int kLowMeanEndHz = 400; | 55 const int kLowMeanEndHz = 400; | 
| 63 | 56 | 
| (...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 178 center += array_geometry[i].c[dim]; | 171 center += array_geometry[i].c[dim]; | 
| 179 } | 172 } | 
| 180 center /= array_geometry.size(); | 173 center /= array_geometry.size(); | 
| 181 for (size_t i = 0; i < array_geometry.size(); ++i) { | 174 for (size_t i = 0; i < array_geometry.size(); ++i) { | 
| 182 array_geometry[i].c[dim] -= center; | 175 array_geometry[i].c[dim] -= center; | 
| 183 } | 176 } | 
| 184 } | 177 } | 
| 185 return array_geometry; | 178 return array_geometry; | 
| 186 } | 179 } | 
| 187 | 180 | 
| 181 Point PairDirection(const Point& a, const Point& b) { | |
| 182 return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()}; | |
| 183 } | |
| 184 | |
| 185 float DotProduct(const Point& a, const Point& b) { | |
| 186 return a.x() * b.x() + a.y() * b.y() + a.z() * b.z(); | |
| 187 } | |
| 188 | |
| 189 bool IsGeometryLinear(std::vector<Point> array_geometry) { | |
| 
 
Andrew MacDonald
2015/10/21 03:27:43
DCHECK_GT(array_geometry.size(), 1);
 
aluebs-webrtc
2015/10/27 18:08:15
Done.
 
 | |
| 190 const float kMinDotProduct = 0.9999f; | |
| 191 const float kMinNorm = 1e-6f; | |
| 192 const Point first_pair_direction = | |
| 193 PairDirection(array_geometry[0], array_geometry[1]); | |
| 194 float norm_a = std::max( | |
| 195 kMinNorm, DotProduct(first_pair_direction, first_pair_direction)); | |
| 196 for (size_t i = 2u; i < array_geometry.size(); ++i) { | |
| 197 const Point pair_direction = | |
| 198 PairDirection(array_geometry[i - 1], array_geometry[i]); | |
| 199 float norm_b = | |
| 
 
Andrew MacDonald
2015/10/21 03:27:43
const
 
aluebs-webrtc
2015/10/27 18:08:15
Done.
 
 | |
| 200 std::max(kMinNorm, DotProduct(pair_direction, pair_direction)); | |
| 201 if (std::abs(DotProduct(first_pair_direction, pair_direction) / | |
| 202 std::sqrt(norm_a * norm_b)) > kMinDotProduct) { | |
| 203 return false; | |
| 204 } | |
| 205 } | |
| 206 return true; | |
| 207 } | |
| 208 | |
| 209 Point AzimuthToPoint(float azimuth) { | |
| 210 return Point(std::cos(azimuth), std::sin(azimuth), 0.f); | |
| 211 } | |
| 212 | |
| 188 } // namespace | 213 } // namespace | 
| 189 | 214 | 
| 190 // static | 215 // static | 
| 191 const size_t NonlinearBeamformer::kNumFreqBins; | 216 const size_t NonlinearBeamformer::kNumFreqBins; | 
| 192 | 217 | 
| 193 NonlinearBeamformer::NonlinearBeamformer( | 218 NonlinearBeamformer::NonlinearBeamformer( | 
| 194 const std::vector<Point>& array_geometry) | 219 const std::vector<Point>& array_geometry, | 
| 220 SphericalPointf target_direction) | |
| 195 : num_input_channels_(array_geometry.size()), | 221 : num_input_channels_(array_geometry.size()), | 
| 196 array_geometry_(GetCenteredArray(array_geometry)), | 222 array_geometry_(GetCenteredArray(array_geometry)), | 
| 197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { | 223 min_mic_spacing_(GetMinimumSpacing(array_geometry)), | 
| 224 target_angle_radians_(target_direction.azimuth()) { | |
| 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 225 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 
| 199 } | 226 } | 
| 200 | 227 | 
| 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 228 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 
| 202 chunk_length_ = | 229 chunk_length_ = | 
| 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 230 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 
| 204 sample_rate_hz_ = sample_rate_hz; | 231 sample_rate_hz_ = sample_rate_hz; | 
| 205 InitFrequencyCorrectionRanges(); | |
| 206 | 232 | 
| 207 high_pass_postfilter_mask_ = 1.f; | 233 high_pass_postfilter_mask_ = 1.f; | 
| 208 is_target_present_ = false; | 234 is_target_present_ = false; | 
| 209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 235 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 
| 210 interference_blocks_count_ = hold_target_blocks_; | 236 interference_blocks_count_ = hold_target_blocks_; | 
| 211 | 237 | 
| 212 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 238 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 
| 213 1, | 239 1, | 
| 214 chunk_length_, | 240 chunk_length_, | 
| 215 window_, | 241 window_, | 
| 216 kFftSize, | 242 kFftSize, | 
| 217 kFftSize / 2, | 243 kFftSize / 2, | 
| 218 this)); | 244 this)); | 
| 219 for (size_t i = 0; i < kNumFreqBins; ++i) { | 245 for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| 220 time_smooth_mask_[i] = 1.f; | 246 time_smooth_mask_[i] = 1.f; | 
| 221 final_mask_[i] = 1.f; | 247 final_mask_[i] = 1.f; | 
| 222 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 248 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 
| 223 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 249 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 
| 224 } | 250 } | 
| 225 | 251 | 
| 226 // Initialize all nonadaptive values before looping through the frames. | 252 InitLowFrequencyCorrectionRanges(); | 
| 227 InitInterfAngles(); | 253 InitDiffuseCovMats(); | 
| 228 InitDelaySumMasks(); | 254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); | 
| 229 InitTargetCovMats(); | |
| 230 InitInterfCovMats(); | |
| 231 | |
| 232 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
| 233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | |
| 234 rpsiws_[i].clear(); | |
| 235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | |
| 236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | |
| 237 } | |
| 238 } | |
| 239 } | 255 } | 
| 240 | 256 | 
| 241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() { | 257 // These bin indexes determine the regions over which a mean is taken. This is | 
| 258 // applied as a constant value over the adjacent end "frequency correction" | |
| 259 // regions. | |
| 260 // | |
| 261 // low_mean_start_bin_ high_mean_start_bin_ | |
| 262 // v v constant | |
| 263 // |----------------|--------|----------------|-------|----------------| | |
| 264 // constant ^ ^ | |
| 265 // low_mean_end_bin_ high_mean_end_bin_ | |
| 266 // | |
| 267 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { | |
| 268 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | |
| 269 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
| 270 | |
| 271 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
| 272 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
| 273 } | |
| 274 | |
| 275 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { | |
| 242 const float kAliasingFreqHz = | 276 const float kAliasingFreqHz = | 
| 243 kSpeedOfSoundMeterSeconds / | 277 kSpeedOfSoundMeterSeconds / | 
| 244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); | 278 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); | 
| 245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, | 279 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, | 
| 246 sample_rate_hz_ / 2.f); | 280 sample_rate_hz_ / 2.f); | 
| 247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, | 281 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, | 
| 248 sample_rate_hz_ / 2.f); | 282 sample_rate_hz_ / 2.f); | 
| 249 | |
| 250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | |
| 251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
| 252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | 283 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | 
| 253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | 284 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | 
| 254 // These bin indexes determine the regions over which a mean is taken. This | 285 | 
| 255 // is applied as a constant value over the adjacent end "frequency correction" | |
| 256 // regions. | |
| 257 // | |
| 258 // low_mean_start_bin_ high_mean_start_bin_ | |
| 259 // v v constant | |
| 260 // |----------------|--------|----------------|-------|----------------| | |
| 261 // constant ^ ^ | |
| 262 // low_mean_end_bin_ high_mean_end_bin_ | |
| 263 // | |
| 264 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
| 265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
| 266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 286 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 
| 267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 287 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 
| 268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 288 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 
| 269 } | 289 } | 
| 270 | 290 | 
| 271 | |
| 272 void NonlinearBeamformer::InitInterfAngles() { | 291 void NonlinearBeamformer::InitInterfAngles() { | 
| 
 
Andrew MacDonald
2015/10/21 03:27:43
This looks good, but it's probably complicated eno
 
aluebs-webrtc
2015/10/27 18:08:15
Good point! :)
And the great part about writing un
 
 | |
| 273 const float kAwayRadians = | 292 const float kAwayRadians = | 
| 274 std::min(static_cast<float>(M_PI), | 293 std::min(static_cast<float>(M_PI), | 
| 275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / | 294 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / | 
| 276 min_mic_spacing_)); | 295 min_mic_spacing_)); | 
| 277 | 296 | 
| 278 interf_angles_radians_.clear(); | 297 interf_angles_radians_.clear(); | 
| 279 // TODO(aluebs): When the target angle is settable, make sure the interferer | 298 if (IsGeometryLinear(array_geometry_)) { | 
| 280 // scenarios aren't reflected over the target one for linear geometries. | 299 const Point array_direction = | 
| 281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); | 300 PairDirection(array_geometry_[0], array_geometry_[1]); | 
| 282 interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); | 301 const Point array_normal(array_direction.y(), -array_direction.x(), 0.f); | 
| 302 const Point target_direction = AzimuthToPoint(target_angle_radians_); | |
| 303 const Point clockwise_interf_direction = | |
| 304 AzimuthToPoint(target_angle_radians_ - kAwayRadians); | |
| 305 if (DotProduct(array_normal, target_direction) * | |
| 
 
Andrew MacDonald
2015/10/21 03:27:43
Add some comments to each of these conditions, bec
 
aluebs-webrtc
2015/10/27 18:08:15
Done.
 
 | |
| 306 DotProduct(array_normal, clockwise_interf_direction) >= | |
| 307 0.f) { | |
| 308 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians); | |
| 309 } else { | |
| 310 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians + | |
| 311 M_PI); | |
| 312 } | |
| 313 const Point counterclock_interf_direction = | |
| 314 AzimuthToPoint(target_angle_radians_ + kAwayRadians); | |
| 315 if (DotProduct(array_normal, target_direction) * | |
| 316 DotProduct(array_normal, counterclock_interf_direction) >= | |
| 317 0.f) { | |
| 318 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians); | |
| 319 } else { | |
| 320 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians - | |
| 321 M_PI); | |
| 322 } | |
| 323 } else { | |
| 324 interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians); | |
| 325 interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians); | |
| 326 } | |
| 283 } | 327 } | 
| 284 | 328 | 
| 285 void NonlinearBeamformer::InitDelaySumMasks() { | 329 void NonlinearBeamformer::InitDelaySumMasks() { | 
| 286 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 330 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 
| 287 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 331 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 
| 288 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 332 CovarianceMatrixGenerator::PhaseAlignmentMasks( | 
| 289 kFftSize, | 333 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, | 
| 290 sample_rate_hz_, | 334 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); | 
| 291 kSpeedOfSoundMeterSeconds, | |
| 292 array_geometry_, | |
| 293 kTargetAngleRadians, | |
| 294 &delay_sum_masks_[f_ix]); | |
| 295 | 335 | 
| 296 complex_f norm_factor = sqrt( | 336 complex_f norm_factor = sqrt( | 
| 297 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 337 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 
| 298 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 338 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 
| 299 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 339 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 
| 300 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 340 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 
| 301 normalized_delay_sum_masks_[f_ix])); | 341 normalized_delay_sum_masks_[f_ix])); | 
| 302 } | 342 } | 
| 303 } | 343 } | 
| 304 | 344 | 
| 305 void NonlinearBeamformer::InitTargetCovMats() { | 345 void NonlinearBeamformer::InitTargetCovMats() { | 
| 306 for (size_t i = 0; i < kNumFreqBins; ++i) { | 346 for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| 307 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 347 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 
| 308 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 348 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 
| 309 } | 349 } | 
| 310 } | 350 } | 
| 311 | 351 | 
| 352 void NonlinearBeamformer::InitDiffuseCovMats() { | |
| 353 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
| 354 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); | |
| 355 CovarianceMatrixGenerator::UniformCovarianceMatrix( | |
| 356 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]); | |
| 357 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; | |
| 358 uniform_cov_mat_[i].Scale(1.f / normalization_factor); | |
| 359 uniform_cov_mat_[i].Scale(1 - kBalance); | |
| 360 } | |
| 361 } | |
| 362 | |
| 312 void NonlinearBeamformer::InitInterfCovMats() { | 363 void NonlinearBeamformer::InitInterfCovMats() { | 
| 313 for (size_t i = 0; i < kNumFreqBins; ++i) { | 364 for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| 314 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | |
| 315 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | |
| 316 array_geometry_, | |
| 317 &uniform_cov_mat); | |
| 318 complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; | |
| 319 uniform_cov_mat.Scale(1.f / normalization_factor); | |
| 320 uniform_cov_mat.Scale(1 - kBalance); | |
| 321 interf_cov_mats_[i].clear(); | 365 interf_cov_mats_[i].clear(); | 
| 322 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 366 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 
| 323 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, | 367 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, | 
| 324 num_input_channels_)); | 368 num_input_channels_)); | 
| 325 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | 369 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | 
| 326 CovarianceMatrixGenerator::AngledCovarianceMatrix( | 370 CovarianceMatrixGenerator::AngledCovarianceMatrix( | 
| 327 kSpeedOfSoundMeterSeconds, | 371 kSpeedOfSoundMeterSeconds, | 
| 328 interf_angles_radians_[j], | 372 interf_angles_radians_[j], | 
| 329 i, | 373 i, | 
| 330 kFftSize, | 374 kFftSize, | 
| 331 kNumFreqBins, | 375 kNumFreqBins, | 
| 332 sample_rate_hz_, | 376 sample_rate_hz_, | 
| 333 array_geometry_, | 377 array_geometry_, | 
| 334 &angled_cov_mat); | 378 &angled_cov_mat); | 
| 335 // Normalize matrices before averaging them. | 379 // Normalize matrices before averaging them. | 
| 336 normalization_factor = angled_cov_mat.elements()[0][0]; | 380 complex_f normalization_factor = angled_cov_mat.elements()[0][0]; | 
| 337 angled_cov_mat.Scale(1.f / normalization_factor); | 381 angled_cov_mat.Scale(1.f / normalization_factor); | 
| 338 // Weighted average of matrices. | 382 // Weighted average of matrices. | 
| 339 angled_cov_mat.Scale(kBalance); | 383 angled_cov_mat.Scale(kBalance); | 
| 340 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); | 384 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); | 
| 341 } | 385 } | 
| 342 } | 386 } | 
| 343 } | 387 } | 
| 388 | |
| 389 void NonlinearBeamformer::NormalizeCovMats() { | |
| 390 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
| 391 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | |
| 392 rpsiws_[i].clear(); | |
| 393 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | |
| 394 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | |
| 395 } | |
| 396 } | |
| 397 } | |
| 344 | 398 | 
| 345 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 399 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 
| 346 ChannelBuffer<float>* output) { | 400 ChannelBuffer<float>* output) { | 
| 347 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 401 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 
| 348 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 402 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 
| 349 | 403 | 
| 350 float old_high_pass_mask = high_pass_postfilter_mask_; | 404 float old_high_pass_mask = high_pass_postfilter_mask_; | 
| 351 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 405 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 
| 352 // Ramp up/down for smoothing. 1 mask per 10ms results in audible | 406 // Ramp up/down for smoothing. 1 mask per 10ms results in audible | 
| 353 // discontinuities. | 407 // discontinuities. | 
| 354 const float ramp_increment = | 408 const float ramp_increment = | 
| 355 (high_pass_postfilter_mask_ - old_high_pass_mask) / | 409 (high_pass_postfilter_mask_ - old_high_pass_mask) / | 
| 356 input.num_frames_per_band(); | 410 input.num_frames_per_band(); | 
| 357 // Apply delay and sum and post-filter in the time domain. WARNING: only works | 411 // Apply delay and sum and post-filter in the time domain. WARNING: only works | 
| 358 // because delay-and-sum is not frequency dependent. | 412 // because delay-and-sum is not frequency dependent. | 
| 359 for (size_t i = 1; i < input.num_bands(); ++i) { | 413 for (size_t i = 1; i < input.num_bands(); ++i) { | 
| 360 float smoothed_mask = old_high_pass_mask; | 414 float smoothed_mask = old_high_pass_mask; | 
| 361 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { | 415 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { | 
| 362 smoothed_mask += ramp_increment; | 416 smoothed_mask += ramp_increment; | 
| 363 | 417 | 
| 364 // Applying the delay and sum (at zero degrees, this is equivalent to | 418 // Applying the delay and sum (at zero degrees, this is equivalent to | 
| 365 // averaging). | 419 // averaging). | 
| 
 
Andrew MacDonald
2015/10/22 01:43:32
CRITICAL ;-)
Just noticed this because I'm lookin
 
Andrew MacDonald
2015/10/22 01:48:55
I think the Matlab code does the delay-and-sum fil
 
Andrew MacDonald
2015/10/22 01:53:03
Ah, that doesn't make any sense. This is for the u
 
aluebs-webrtc
2015/10/27 18:08:15
As you said, this is only for the higher bands, so
 
Andrew MacDonald
2015/10/28 01:57:56
This looks good, assuming the effect is as negligi
 
aluebs-webrtc
2015/10/29 00:34:21
I did confirm that I couldn't hear the difference.
 
 | |
| 366 float sum = 0.f; | 420 float sum = 0.f; | 
| 367 for (int k = 0; k < input.num_channels(); ++k) { | 421 for (int k = 0; k < input.num_channels(); ++k) { | 
| 368 sum += input.channels(i)[k][j]; | 422 sum += input.channels(i)[k][j]; | 
| 369 } | 423 } | 
| 370 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; | 424 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; | 
| 371 } | 425 } | 
| 372 } | 426 } | 
| 373 } | 427 } | 
| 374 | 428 | 
| 429 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { | |
| 430 target_angle_radians_ = target_direction.azimuth(); | |
| 431 InitHighFrequencyCorrectionRanges(); | |
| 432 InitInterfAngles(); | |
| 433 InitDelaySumMasks(); | |
| 434 InitTargetCovMats(); | |
| 435 InitInterfCovMats(); | |
| 436 NormalizeCovMats(); | |
| 437 } | |
| 438 | |
| 375 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { | 439 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { | 
| 376 // If more than half-beamwidth degrees away from the beam's center, | 440 // If more than half-beamwidth degrees away from the beam's center, | 
| 377 // you are out of the beam. | 441 // you are out of the beam. | 
| 378 return fabs(spherical_point.azimuth() - kTargetAngleRadians) < | 442 return fabs(spherical_point.azimuth() - target_angle_radians_) < | 
| 379 kHalfBeamWidthRadians; | 443 kHalfBeamWidthRadians; | 
| 380 } | 444 } | 
| 381 | 445 | 
| 382 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, | 446 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, | 
| 383 int num_input_channels, | 447 int num_input_channels, | 
| 384 size_t num_freq_bins, | 448 size_t num_freq_bins, | 
| 385 int num_output_channels, | 449 int num_output_channels, | 
| 386 complex_f* const* output) { | 450 complex_f* const* output) { | 
| 387 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); | 451 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); | 
| 388 RTC_CHECK_EQ(num_input_channels, num_input_channels_); | 452 RTC_CHECK_EQ(num_input_channels, num_input_channels_); | 
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 533 new_mask_ + high_mean_end_bin_ + 1); | 597 new_mask_ + high_mean_end_bin_ + 1); | 
| 534 if (new_mask_[quantile] > kMaskTargetThreshold) { | 598 if (new_mask_[quantile] > kMaskTargetThreshold) { | 
| 535 is_target_present_ = true; | 599 is_target_present_ = true; | 
| 536 interference_blocks_count_ = 0; | 600 interference_blocks_count_ = 0; | 
| 537 } else { | 601 } else { | 
| 538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 602 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 
| 539 } | 603 } | 
| 540 } | 604 } | 
| 541 | 605 | 
| 542 } // namespace webrtc | 606 } // namespace webrtc | 
| OLD | NEW |