| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator
.h" | 22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator
.h" |
| 23 | 23 |
| 24 namespace webrtc { | 24 namespace webrtc { |
| 25 namespace { | 25 namespace { |
| 26 | 26 |
| 27 // Alpha for the Kaiser Bessel Derived window. | 27 // Alpha for the Kaiser Bessel Derived window. |
| 28 const float kKbdAlpha = 1.5f; | 28 const float kKbdAlpha = 1.5f; |
| 29 | 29 |
| 30 const float kSpeedOfSoundMeterSeconds = 343; | 30 const float kSpeedOfSoundMeterSeconds = 343; |
| 31 | 31 |
| 32 // For both target and interference angles, PI / 2 is perpendicular to the | |
| 33 // microphone array, facing forwards. The positive direction goes | |
| 34 // counterclockwise. | |
| 35 // The angle at which we amplify sound. | |
| 36 // TODO(aluebs): Make the target angle dynamically settable. | |
| 37 const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; | |
| 38 | |
| 39 // The minimum separation in radians between the target direction and an | 32 // The minimum separation in radians between the target direction and an |
| 40 // interferer scenario. | 33 // interferer scenario. |
| 41 const float kMinAwayRadians = 0.2f; | 34 const float kMinAwayRadians = 0.2f; |
| 42 | 35 |
| 43 // The separation between the target direction and the closest interferer | 36 // The separation between the target direction and the closest interferer |
| 44 // scenario is proportional to this constant. | 37 // scenario is proportional to this constant. |
| 45 const float kAwaySlope = 0.008f; | 38 const float kAwaySlope = 0.008f; |
| 46 | 39 |
| 47 // When calculating the interference covariance matrix, this is the weight for | 40 // When calculating the interference covariance matrix, this is the weight for |
| 48 // the weighted average between the uniform covariance matrix and the angled | 41 // the weighted average between the uniform covariance matrix and the angled |
| 49 // covariance matrix. | 42 // covariance matrix. |
| 50 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) |
| 51 const float kBalance = 0.95f; | 44 const float kBalance = 0.95f; |
| 52 | 45 |
| 53 const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; | |
| 54 | |
| 55 // Alpha coefficients for mask smoothing. | 46 // Alpha coefficients for mask smoothing. |
| 56 const float kMaskTimeSmoothAlpha = 0.2f; | 47 const float kMaskTimeSmoothAlpha = 0.2f; |
| 57 const float kMaskFrequencySmoothAlpha = 0.6f; | 48 const float kMaskFrequencySmoothAlpha = 0.6f; |
| 58 | 49 |
| 59 // The average mask is computed from masks in this mid-frequency range. If these | 50 // The average mask is computed from masks in this mid-frequency range. If these |
| 60 // ranges are changed |kMaskQuantile| might need to be adjusted. | 51 // ranges are changed |kMaskQuantile| might need to be adjusted. |
| 61 const int kLowMeanStartHz = 200; | 52 const int kLowMeanStartHz = 200; |
| 62 const int kLowMeanEndHz = 400; | 53 const int kLowMeanEndHz = 400; |
| 63 | 54 |
| 64 // Range limiter for subtractive terms in the nominator and denominator of the | 55 // Range limiter for subtractive terms in the nominator and denominator of the |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 180 center /= array_geometry.size(); | 171 center /= array_geometry.size(); |
| 181 for (size_t i = 0; i < array_geometry.size(); ++i) { | 172 for (size_t i = 0; i < array_geometry.size(); ++i) { |
| 182 array_geometry[i].c[dim] -= center; | 173 array_geometry[i].c[dim] -= center; |
| 183 } | 174 } |
| 184 } | 175 } |
| 185 return array_geometry; | 176 return array_geometry; |
| 186 } | 177 } |
| 187 | 178 |
| 188 } // namespace | 179 } // namespace |
| 189 | 180 |
| 181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); |
| 182 |
| 190 // static | 183 // static |
| 191 const size_t NonlinearBeamformer::kNumFreqBins; | 184 const size_t NonlinearBeamformer::kNumFreqBins; |
| 192 | 185 |
| 193 NonlinearBeamformer::NonlinearBeamformer( | 186 NonlinearBeamformer::NonlinearBeamformer( |
| 194 const std::vector<Point>& array_geometry) | 187 const std::vector<Point>& array_geometry, |
| 188 SphericalPointf target_direction) |
| 195 : num_input_channels_(array_geometry.size()), | 189 : num_input_channels_(array_geometry.size()), |
| 196 array_geometry_(GetCenteredArray(array_geometry)), | 190 array_geometry_(GetCenteredArray(array_geometry)), |
| 197 min_mic_spacing_(GetMinimumSpacing(array_geometry)) { | 191 array_normal_(GetArrayNormalIfExists(array_geometry)), |
| 192 min_mic_spacing_(GetMinimumSpacing(array_geometry)), |
| 193 target_angle_radians_(target_direction.azimuth()), |
| 194 away_radians_(std::min( |
| 195 static_cast<float>(M_PI), |
| 196 std::max(kMinAwayRadians, |
| 197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { |
| 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
| 199 } | 199 } |
| 200 | 200 |
| 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| 202 chunk_length_ = | 202 chunk_length_ = |
| 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); |
| 204 sample_rate_hz_ = sample_rate_hz; | 204 sample_rate_hz_ = sample_rate_hz; |
| 205 InitFrequencyCorrectionRanges(); | |
| 206 | 205 |
| 207 high_pass_postfilter_mask_ = 1.f; | 206 high_pass_postfilter_mask_ = 1.f; |
| 208 is_target_present_ = false; | 207 is_target_present_ = false; |
| 209 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
| 210 interference_blocks_count_ = hold_target_blocks_; | 209 interference_blocks_count_ = hold_target_blocks_; |
| 211 | 210 |
| 212 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 211 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
| 213 1, | 212 1, |
| 214 chunk_length_, | 213 chunk_length_, |
| 215 window_, | 214 window_, |
| 216 kFftSize, | 215 kFftSize, |
| 217 kFftSize / 2, | 216 kFftSize / 2, |
| 218 this)); | 217 this)); |
| 219 for (size_t i = 0; i < kNumFreqBins; ++i) { | 218 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 220 time_smooth_mask_[i] = 1.f; | 219 time_smooth_mask_[i] = 1.f; |
| 221 final_mask_[i] = 1.f; | 220 final_mask_[i] = 1.f; |
| 222 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
| 223 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
| 224 } | 223 } |
| 225 | 224 |
| 226 // Initialize all nonadaptive values before looping through the frames. | 225 InitLowFrequencyCorrectionRanges(); |
| 227 InitInterfAngles(); | 226 InitDiffuseCovMats(); |
| 228 InitDelaySumMasks(); | 227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); |
| 229 InitTargetCovMats(); | |
| 230 InitInterfCovMats(); | |
| 231 | |
| 232 for (size_t i = 0; i < kNumFreqBins; ++i) { | |
| 233 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | |
| 234 rpsiws_[i].clear(); | |
| 235 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | |
| 236 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | |
| 237 } | |
| 238 } | |
| 239 } | 228 } |
| 240 | 229 |
| 241 void NonlinearBeamformer::InitFrequencyCorrectionRanges() { | 230 // These bin indexes determine the regions over which a mean is taken. This is |
| 231 // applied as a constant value over the adjacent end "frequency correction" |
| 232 // regions. |
| 233 // |
| 234 // low_mean_start_bin_ high_mean_start_bin_ |
| 235 // v v constant |
| 236 // |----------------|--------|----------------|-------|----------------| |
| 237 // constant ^ ^ |
| 238 // low_mean_end_bin_ high_mean_end_bin_ |
| 239 // |
| 240 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { |
| 241 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); |
| 242 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); |
| 243 |
| 244 RTC_DCHECK_GT(low_mean_start_bin_, 0U); |
| 245 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
| 246 } |
| 247 |
| 248 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { |
| 242 const float kAliasingFreqHz = | 249 const float kAliasingFreqHz = |
| 243 kSpeedOfSoundMeterSeconds / | 250 kSpeedOfSoundMeterSeconds / |
| 244 (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); | 251 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); |
| 245 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, | 252 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, |
| 246 sample_rate_hz_ / 2.f); | 253 sample_rate_hz_ / 2.f); |
| 247 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, | 254 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, |
| 248 sample_rate_hz_ / 2.f); | 255 sample_rate_hz_ / 2.f); |
| 249 | |
| 250 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | |
| 251 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | |
| 252 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | 256 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); |
| 253 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | 257 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); |
| 254 // These bin indexes determine the regions over which a mean is taken. This | 258 |
| 255 // is applied as a constant value over the adjacent end "frequency correction" | |
| 256 // regions. | |
| 257 // | |
| 258 // low_mean_start_bin_ high_mean_start_bin_ | |
| 259 // v v constant | |
| 260 // |----------------|--------|----------------|-------|----------------| | |
| 261 // constant ^ ^ | |
| 262 // low_mean_end_bin_ high_mean_end_bin_ | |
| 263 // | |
| 264 RTC_DCHECK_GT(low_mean_start_bin_, 0U); | |
| 265 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | |
| 266 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 259 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
| 267 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 260 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
| 268 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 261 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
| 269 } | 262 } |
| 270 | 263 |
| 271 | |
| 272 void NonlinearBeamformer::InitInterfAngles() { | 264 void NonlinearBeamformer::InitInterfAngles() { |
| 273 const float kAwayRadians = | |
| 274 std::min(static_cast<float>(M_PI), | |
| 275 std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) / | |
| 276 min_mic_spacing_)); | |
| 277 | |
| 278 interf_angles_radians_.clear(); | 265 interf_angles_radians_.clear(); |
| 279 // TODO(aluebs): When the target angle is settable, make sure the interferer | 266 const Point target_direction = AzimuthToPoint(target_angle_radians_); |
| 280 // scenarios aren't reflected over the target one for linear geometries. | 267 const Point clockwise_interf_direction = |
| 281 interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); | 268 AzimuthToPoint(target_angle_radians_ - away_radians_); |
| 282 interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); | 269 if (!array_normal_ || |
| 270 DotProduct(*array_normal_, target_direction) * |
| 271 DotProduct(*array_normal_, clockwise_interf_direction) >= |
| 272 0.f) { |
| 273 // The target and clockwise interferer are in the same half-plane defined |
| 274 // by the array. |
| 275 interf_angles_radians_.push_back(target_angle_radians_ - away_radians_); |
| 276 } else { |
| 277 // Otherwise, the interferer will begin reflecting back at the target. |
| 278 // Instead rotate it away 180 degrees. |
| 279 interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ + |
| 280 M_PI); |
| 281 } |
| 282 const Point counterclock_interf_direction = |
| 283 AzimuthToPoint(target_angle_radians_ + away_radians_); |
| 284 if (!array_normal_ || |
| 285 DotProduct(*array_normal_, target_direction) * |
| 286 DotProduct(*array_normal_, counterclock_interf_direction) >= |
| 287 0.f) { |
| 288 // The target and counter-clockwise interferer are in the same half-plane |
| 289 // defined by the array. |
| 290 interf_angles_radians_.push_back(target_angle_radians_ + away_radians_); |
| 291 } else { |
| 292 // Otherwise, the interferer will begin reflecting back at the target. |
| 293 // Instead rotate it away 180 degrees. |
| 294 interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ - |
| 295 M_PI); |
| 296 } |
| 283 } | 297 } |
| 284 | 298 |
| 285 void NonlinearBeamformer::InitDelaySumMasks() { | 299 void NonlinearBeamformer::InitDelaySumMasks() { |
| 286 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 300 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| 287 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 301 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); |
| 288 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 302 CovarianceMatrixGenerator::PhaseAlignmentMasks( |
| 289 kFftSize, | 303 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, |
| 290 sample_rate_hz_, | 304 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); |
| 291 kSpeedOfSoundMeterSeconds, | |
| 292 array_geometry_, | |
| 293 kTargetAngleRadians, | |
| 294 &delay_sum_masks_[f_ix]); | |
| 295 | 305 |
| 296 complex_f norm_factor = sqrt( | 306 complex_f norm_factor = sqrt( |
| 297 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 307 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
| 298 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 308 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
| 299 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 309 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
| 300 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 310 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
| 301 normalized_delay_sum_masks_[f_ix])); | 311 normalized_delay_sum_masks_[f_ix])); |
| 302 } | 312 } |
| 303 } | 313 } |
| 304 | 314 |
| 305 void NonlinearBeamformer::InitTargetCovMats() { | 315 void NonlinearBeamformer::InitTargetCovMats() { |
| 306 for (size_t i = 0; i < kNumFreqBins; ++i) { | 316 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 307 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 317 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); |
| 308 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 318 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); |
| 309 } | 319 } |
| 310 } | 320 } |
| 311 | 321 |
| 322 void NonlinearBeamformer::InitDiffuseCovMats() { |
| 323 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 324 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); |
| 325 CovarianceMatrixGenerator::UniformCovarianceMatrix( |
| 326 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]); |
| 327 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; |
| 328 uniform_cov_mat_[i].Scale(1.f / normalization_factor); |
| 329 uniform_cov_mat_[i].Scale(1 - kBalance); |
| 330 } |
| 331 } |
| 332 |
| 312 void NonlinearBeamformer::InitInterfCovMats() { | 333 void NonlinearBeamformer::InitInterfCovMats() { |
| 313 for (size_t i = 0; i < kNumFreqBins; ++i) { | 334 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 314 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | |
| 315 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | |
| 316 array_geometry_, | |
| 317 &uniform_cov_mat); | |
| 318 complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; | |
| 319 uniform_cov_mat.Scale(1.f / normalization_factor); | |
| 320 uniform_cov_mat.Scale(1 - kBalance); | |
| 321 interf_cov_mats_[i].clear(); | 335 interf_cov_mats_[i].clear(); |
| 322 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 336 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
| 323 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, | 337 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, |
| 324 num_input_channels_)); | 338 num_input_channels_)); |
| 325 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | 339 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); |
| 326 CovarianceMatrixGenerator::AngledCovarianceMatrix( | 340 CovarianceMatrixGenerator::AngledCovarianceMatrix( |
| 327 kSpeedOfSoundMeterSeconds, | 341 kSpeedOfSoundMeterSeconds, |
| 328 interf_angles_radians_[j], | 342 interf_angles_radians_[j], |
| 329 i, | 343 i, |
| 330 kFftSize, | 344 kFftSize, |
| 331 kNumFreqBins, | 345 kNumFreqBins, |
| 332 sample_rate_hz_, | 346 sample_rate_hz_, |
| 333 array_geometry_, | 347 array_geometry_, |
| 334 &angled_cov_mat); | 348 &angled_cov_mat); |
| 335 // Normalize matrices before averaging them. | 349 // Normalize matrices before averaging them. |
| 336 normalization_factor = angled_cov_mat.elements()[0][0]; | 350 complex_f normalization_factor = angled_cov_mat.elements()[0][0]; |
| 337 angled_cov_mat.Scale(1.f / normalization_factor); | 351 angled_cov_mat.Scale(1.f / normalization_factor); |
| 338 // Weighted average of matrices. | 352 // Weighted average of matrices. |
| 339 angled_cov_mat.Scale(kBalance); | 353 angled_cov_mat.Scale(kBalance); |
| 340 interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); | 354 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); |
| 341 } | 355 } |
| 342 } | 356 } |
| 343 } | 357 } |
| 358 |
| 359 void NonlinearBeamformer::NormalizeCovMats() { |
| 360 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 361 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
| 362 rpsiws_[i].clear(); |
| 363 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
| 364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
| 365 } |
| 366 } |
| 367 } |
| 344 | 368 |
| 345 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
| 346 ChannelBuffer<float>* output) { | 370 ChannelBuffer<float>* output) { |
| 347 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); | 371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
| 348 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); | 372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
| 349 | 373 |
| 350 float old_high_pass_mask = high_pass_postfilter_mask_; | 374 float old_high_pass_mask = high_pass_postfilter_mask_; |
| 351 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| 352 // Ramp up/down for smoothing. 1 mask per 10ms results in audible | 376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
| 353 // discontinuities. | 377 // discontinuities. |
| 354 const float ramp_increment = | 378 const float ramp_increment = |
| 355 (high_pass_postfilter_mask_ - old_high_pass_mask) / | 379 (high_pass_postfilter_mask_ - old_high_pass_mask) / |
| 356 input.num_frames_per_band(); | 380 input.num_frames_per_band(); |
| 357 // Apply delay and sum and post-filter in the time domain. WARNING: only works | 381 // Apply the smoothed high-pass mask to the first channel of each band. |
| 358 // because delay-and-sum is not frequency dependent. | 382 // This can be done because the effct of the linear beamformer is negligible |
| 383 // compared to the post-filter. |
| 359 for (size_t i = 1; i < input.num_bands(); ++i) { | 384 for (size_t i = 1; i < input.num_bands(); ++i) { |
| 360 float smoothed_mask = old_high_pass_mask; | 385 float smoothed_mask = old_high_pass_mask; |
| 361 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { | 386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
| 362 smoothed_mask += ramp_increment; | 387 smoothed_mask += ramp_increment; |
| 363 | 388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; |
| 364 // Applying the delay and sum (at zero degrees, this is equivalent to | |
| 365 // averaging). | |
| 366 float sum = 0.f; | |
| 367 for (int k = 0; k < input.num_channels(); ++k) { | |
| 368 sum += input.channels(i)[k][j]; | |
| 369 } | |
| 370 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; | |
| 371 } | 389 } |
| 372 } | 390 } |
| 373 } | 391 } |
| 374 | 392 |
| 393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { |
| 394 target_angle_radians_ = target_direction.azimuth(); |
| 395 InitHighFrequencyCorrectionRanges(); |
| 396 InitInterfAngles(); |
| 397 InitDelaySumMasks(); |
| 398 InitTargetCovMats(); |
| 399 InitInterfCovMats(); |
| 400 NormalizeCovMats(); |
| 401 } |
| 402 |
| 375 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { | 403 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { |
| 376 // If more than half-beamwidth degrees away from the beam's center, | 404 // If more than half-beamwidth degrees away from the beam's center, |
| 377 // you are out of the beam. | 405 // you are out of the beam. |
| 378 return fabs(spherical_point.azimuth() - kTargetAngleRadians) < | 406 return fabs(spherical_point.azimuth() - target_angle_radians_) < |
| 379 kHalfBeamWidthRadians; | 407 kHalfBeamWidthRadians; |
| 380 } | 408 } |
| 381 | 409 |
| 382 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, | 410 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
| 383 int num_input_channels, | 411 int num_input_channels, |
| 384 size_t num_freq_bins, | 412 size_t num_freq_bins, |
| 385 int num_output_channels, | 413 int num_output_channels, |
| 386 complex_f* const* output) { | 414 complex_f* const* output) { |
| 387 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); | 415 RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); |
| 388 RTC_CHECK_EQ(num_input_channels, num_input_channels_); | 416 RTC_CHECK_EQ(num_input_channels, num_input_channels_); |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 533 new_mask_ + high_mean_end_bin_ + 1); | 561 new_mask_ + high_mean_end_bin_ + 1); |
| 534 if (new_mask_[quantile] > kMaskTargetThreshold) { | 562 if (new_mask_[quantile] > kMaskTargetThreshold) { |
| 535 is_target_present_ = true; | 563 is_target_present_ = true; |
| 536 interference_blocks_count_ = 0; | 564 interference_blocks_count_ = 0; |
| 537 } else { | 565 } else { |
| 538 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 566 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
| 539 } | 567 } |
| 540 } | 568 } |
| 541 | 569 |
| 542 } // namespace webrtc | 570 } // namespace webrtc |
| OLD | NEW |