Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 112 | 112 |
| 113 complex<float> result = complex<float>(0.f, 0.f); | 113 complex<float> result = complex<float>(0.f, 0.f); |
| 114 for (int i = 0; i < lhs.num_columns(); ++i) { | 114 for (int i = 0; i < lhs.num_columns(); ++i) { |
| 115 result += conj(lhs_elements[0][i]) * rhs_elements[0][i]; | 115 result += conj(lhs_elements[0][i]) * rhs_elements[0][i]; |
| 116 } | 116 } |
| 117 | 117 |
| 118 return result; | 118 return result; |
| 119 } | 119 } |
| 120 | 120 |
| 121 // Works for positive numbers only. | 121 // Works for positive numbers only. |
| 122 int Round(float x) { | 122 size_t Round(float x) { |
| 123 return static_cast<int>(std::floor(x + 0.5f)); | 123 return static_cast<size_t>(std::floor(x + 0.5f)); |
| 124 } | 124 } |
| 125 | 125 |
| 126 // Calculates the sum of absolute values of a complex matrix. | 126 // Calculates the sum of absolute values of a complex matrix. |
| 127 float SumAbs(const ComplexMatrix<float>& mat) { | 127 float SumAbs(const ComplexMatrix<float>& mat) { |
| 128 float sum_abs = 0.f; | 128 float sum_abs = 0.f; |
| 129 const complex<float>* const* mat_els = mat.elements(); | 129 const complex<float>* const* mat_els = mat.elements(); |
| 130 for (int i = 0; i < mat.num_rows(); ++i) { | 130 for (int i = 0; i < mat.num_rows(); ++i) { |
| 131 for (int j = 0; j < mat.num_columns(); ++j) { | 131 for (int j = 0; j < mat.num_columns(); ++j) { |
| 132 sum_abs += std::abs(mat_els[i][j]); | 132 sum_abs += std::abs(mat_els[i][j]); |
| 133 } | 133 } |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 172 center /= array_geometry.size(); | 172 center /= array_geometry.size(); |
| 173 for (size_t i = 0; i < array_geometry.size(); ++i) { | 173 for (size_t i = 0; i < array_geometry.size(); ++i) { |
| 174 array_geometry[i].c[dim] -= center; | 174 array_geometry[i].c[dim] -= center; |
| 175 } | 175 } |
| 176 } | 176 } |
| 177 return array_geometry; | 177 return array_geometry; |
| 178 } | 178 } |
| 179 | 179 |
| 180 } // namespace | 180 } // namespace |
| 181 | 181 |
| 182 // static | |
| 183 const size_t NonlinearBeamformer::kNumFreqBins; | |
| 184 | |
| 182 NonlinearBeamformer::NonlinearBeamformer( | 185 NonlinearBeamformer::NonlinearBeamformer( |
| 183 const std::vector<Point>& array_geometry) | 186 const std::vector<Point>& array_geometry) |
| 184 : num_input_channels_(array_geometry.size()), | 187 : num_input_channels_(array_geometry.size()), |
| 185 array_geometry_(GetCenteredArray(array_geometry)) { | 188 array_geometry_(GetCenteredArray(array_geometry)) { |
| 186 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 189 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
| 187 } | 190 } |
| 188 | 191 |
| 189 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 192 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| 190 chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms); | 193 chunk_length_ = |
| 194 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | |
| 191 sample_rate_hz_ = sample_rate_hz; | 195 sample_rate_hz_ = sample_rate_hz; |
| 192 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | 196 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); |
| 193 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | 197 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); |
| 194 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | 198 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); |
| 195 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | 199 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); |
| 196 // These bin indexes determine the regions over which a mean is taken. This | 200 // These bin indexes determine the regions over which a mean is taken. This |
| 197 // is applied as a constant value over the adjacent end "frequency correction" | 201 // is applied as a constant value over the adjacent end "frequency correction" |
| 198 // regions. | 202 // regions. |
| 199 // | 203 // |
| 200 // low_mean_start_bin_ high_mean_start_bin_ | 204 // low_mean_start_bin_ high_mean_start_bin_ |
| 201 // v v constant | 205 // v v constant |
| 202 // |----------------|--------|----------------|-------|----------------| | 206 // |----------------|--------|----------------|-------|----------------| |
| 203 // constant ^ ^ | 207 // constant ^ ^ |
| 204 // low_mean_end_bin_ high_mean_end_bin_ | 208 // low_mean_end_bin_ high_mean_end_bin_ |
| 205 // | 209 // |
| 206 DCHECK_GT(low_mean_start_bin_, 0); | 210 DCHECK_GT(low_mean_start_bin_, 0U); |
| 207 DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | 211 DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
| 208 DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 212 DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
| 209 DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 213 DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
| 210 DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 214 DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
| 211 | 215 |
| 212 high_pass_postfilter_mask_ = 1.f; | 216 high_pass_postfilter_mask_ = 1.f; |
| 213 is_target_present_ = false; | 217 is_target_present_ = false; |
| 214 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | 218 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
| 215 interference_blocks_count_ = hold_target_blocks_; | 219 interference_blocks_count_ = hold_target_blocks_; |
| 216 | 220 |
| 217 | 221 |
| 218 lapped_transform_.reset(new LappedTransform(num_input_channels_, | 222 lapped_transform_.reset(new LappedTransform(num_input_channels_, |
| 219 1, | 223 1, |
| 220 chunk_length_, | 224 chunk_length_, |
| 221 window_, | 225 window_, |
| 222 kFftSize, | 226 kFftSize, |
| 223 kFftSize / 2, | 227 kFftSize / 2, |
| 224 this)); | 228 this)); |
| 225 for (int i = 0; i < kNumFreqBins; ++i) { | 229 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 226 time_smooth_mask_[i] = 1.f; | 230 time_smooth_mask_[i] = 1.f; |
| 227 final_mask_[i] = 1.f; | 231 final_mask_[i] = 1.f; |
| 228 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; | 232 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
| 229 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 233 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
| 230 mask_thresholds_[i] = num_input_channels_ * num_input_channels_ * | 234 mask_thresholds_[i] = num_input_channels_ * num_input_channels_ * |
| 231 kBeamwidthConstant * wave_numbers_[i] * | 235 kBeamwidthConstant * wave_numbers_[i] * |
| 232 wave_numbers_[i]; | 236 wave_numbers_[i]; |
| 233 } | 237 } |
| 234 | 238 |
| 235 // Initialize all nonadaptive values before looping through the frames. | 239 // Initialize all nonadaptive values before looping through the frames. |
| 236 InitDelaySumMasks(); | 240 InitDelaySumMasks(); |
| 237 InitTargetCovMats(); | 241 InitTargetCovMats(); |
| 238 InitInterfCovMats(); | 242 InitInterfCovMats(); |
| 239 | 243 |
| 240 for (int i = 0; i < kNumFreqBins; ++i) { | 244 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 241 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 245 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
| 242 rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]); | 246 rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]); |
| 243 reflected_rpsiws_[i] = | 247 reflected_rpsiws_[i] = |
| 244 Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]); | 248 Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]); |
| 245 } | 249 } |
| 246 } | 250 } |
| 247 | 251 |
| 248 void NonlinearBeamformer::InitDelaySumMasks() { | 252 void NonlinearBeamformer::InitDelaySumMasks() { |
| 249 for (int f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 253 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| 250 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 254 delay_sum_masks_[f_ix].Resize(1, num_input_channels_); |
| 251 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 255 CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, |
| 252 kFftSize, | 256 kFftSize, |
| 253 sample_rate_hz_, | 257 sample_rate_hz_, |
| 254 kSpeedOfSoundMeterSeconds, | 258 kSpeedOfSoundMeterSeconds, |
| 255 array_geometry_, | 259 array_geometry_, |
| 256 kTargetAngleRadians, | 260 kTargetAngleRadians, |
| 257 &delay_sum_masks_[f_ix]); | 261 &delay_sum_masks_[f_ix]); |
| 258 | 262 |
| 259 complex_f norm_factor = sqrt( | 263 complex_f norm_factor = sqrt( |
| 260 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 264 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
| 261 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); | 265 delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
| 262 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); | 266 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
| 263 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( | 267 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
| 264 normalized_delay_sum_masks_[f_ix])); | 268 normalized_delay_sum_masks_[f_ix])); |
| 265 } | 269 } |
| 266 } | 270 } |
| 267 | 271 |
| 268 void NonlinearBeamformer::InitTargetCovMats() { | 272 void NonlinearBeamformer::InitTargetCovMats() { |
| 269 for (int i = 0; i < kNumFreqBins; ++i) { | 273 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 270 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 274 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); |
| 271 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); | 275 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); |
| 272 complex_f normalization_factor = target_cov_mats_[i].Trace(); | 276 complex_f normalization_factor = target_cov_mats_[i].Trace(); |
| 273 target_cov_mats_[i].Scale(1.f / normalization_factor); | 277 target_cov_mats_[i].Scale(1.f / normalization_factor); |
| 274 } | 278 } |
| 275 } | 279 } |
| 276 | 280 |
| 277 void NonlinearBeamformer::InitInterfCovMats() { | 281 void NonlinearBeamformer::InitInterfCovMats() { |
| 278 for (int i = 0; i < kNumFreqBins; ++i) { | 282 for (size_t i = 0; i < kNumFreqBins; ++i) { |
| 279 interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); | 283 interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); |
| 280 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | 284 ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); |
| 281 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); | 285 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); |
| 282 | 286 |
| 283 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | 287 CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], |
| 284 array_geometry_, | 288 array_geometry_, |
| 285 &uniform_cov_mat); | 289 &uniform_cov_mat); |
| 286 | 290 |
| 287 CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds, | 291 CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds, |
| 288 kInterfAngleRadians, | 292 kInterfAngleRadians, |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 313 | 317 |
| 314 float old_high_pass_mask = high_pass_postfilter_mask_; | 318 float old_high_pass_mask = high_pass_postfilter_mask_; |
| 315 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); | 319 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| 316 // Ramp up/down for smoothing. 1 mask per 10ms results in audible | 320 // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
| 317 // discontinuities. | 321 // discontinuities. |
| 318 const float ramp_increment = | 322 const float ramp_increment = |
| 319 (high_pass_postfilter_mask_ - old_high_pass_mask) / | 323 (high_pass_postfilter_mask_ - old_high_pass_mask) / |
| 320 input.num_frames_per_band(); | 324 input.num_frames_per_band(); |
| 321 // Apply delay and sum and post-filter in the time domain. WARNING: only works | 325 // Apply delay and sum and post-filter in the time domain. WARNING: only works |
| 322 // because delay-and-sum is not frequency dependent. | 326 // because delay-and-sum is not frequency dependent. |
| 323 for (int i = 1; i < input.num_bands(); ++i) { | 327 for (size_t i = 1; i < input.num_bands(); ++i) { |
| 324 float smoothed_mask = old_high_pass_mask; | 328 float smoothed_mask = old_high_pass_mask; |
| 325 for (int j = 0; j < input.num_frames_per_band(); ++j) { | 329 for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
| 326 smoothed_mask += ramp_increment; | 330 smoothed_mask += ramp_increment; |
| 327 | 331 |
| 328 // Applying the delay and sum (at zero degrees, this is equivalent to | 332 // Applying the delay and sum (at zero degrees, this is equivalent to |
| 329 // averaging). | 333 // averaging). |
| 330 float sum = 0.f; | 334 float sum = 0.f; |
| 331 for (int k = 0; k < input.num_channels(); ++k) { | 335 for (int k = 0; k < input.num_channels(); ++k) { |
| 332 sum += input.channels(i)[k][j]; | 336 sum += input.channels(i)[k][j]; |
| 333 } | 337 } |
| 334 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; | 338 output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; |
| 335 } | 339 } |
| 336 } | 340 } |
| 337 } | 341 } |
| 338 | 342 |
| 339 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { | 343 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { |
| 340 // If more than half-beamwidth degrees away from the beam's center, | 344 // If more than half-beamwidth degrees away from the beam's center, |
| 341 // you are out of the beam. | 345 // you are out of the beam. |
| 342 return fabs(spherical_point.azimuth() - kTargetAngleRadians) < | 346 return fabs(spherical_point.azimuth() - kTargetAngleRadians) < |
| 343 kHalfBeamWidthRadians; | 347 kHalfBeamWidthRadians; |
| 344 } | 348 } |
| 345 | 349 |
| 346 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, | 350 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
| 347 int num_input_channels, | 351 int num_input_channels, |
|
Andrew MacDonald
2015/07/24 04:01:43
and here
| |
| 348 int num_freq_bins, | 352 size_t num_freq_bins, |
| 349 int num_output_channels, | 353 int num_output_channels, |
| 350 complex_f* const* output) { | 354 complex_f* const* output) { |
| 351 CHECK_EQ(num_freq_bins, kNumFreqBins); | 355 CHECK_EQ(num_freq_bins, kNumFreqBins); |
| 352 CHECK_EQ(num_input_channels, num_input_channels_); | 356 CHECK_EQ(num_input_channels, num_input_channels_); |
| 353 CHECK_EQ(num_output_channels, 1); | 357 CHECK_EQ(num_output_channels, 1); |
| 354 | 358 |
| 355 // Calculating the post-filter masks. Note that we need two for each | 359 // Calculating the post-filter masks. Note that we need two for each |
| 356 // frequency bin to account for the positive and negative interferer | 360 // frequency bin to account for the positive and negative interferer |
| 357 // angle. | 361 // angle. |
| 358 for (int i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { | 362 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |
| 359 eig_m_.CopyFromColumn(input, i, num_input_channels_); | 363 eig_m_.CopyFromColumn(input, i, num_input_channels_); |
| 360 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_)); | 364 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_)); |
| 361 if (eig_m_norm_factor != 0.f) { | 365 if (eig_m_norm_factor != 0.f) { |
| 362 eig_m_.Scale(1.f / eig_m_norm_factor); | 366 eig_m_.Scale(1.f / eig_m_norm_factor); |
| 363 } | 367 } |
| 364 | 368 |
| 365 float rxim = Norm(target_cov_mats_[i], eig_m_); | 369 float rxim = Norm(target_cov_mats_[i], eig_m_); |
| 366 float ratio_rxiw_rxim = 0.f; | 370 float ratio_rxiw_rxim = 0.f; |
| 367 if (rxim > 0.f) { | 371 if (rxim > 0.f) { |
| 368 ratio_rxiw_rxim = rxiws_[i] / rxim; | 372 ratio_rxiw_rxim = rxiws_[i] / rxim; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 413 if (denominator > mask_threshold) { | 417 if (denominator > mask_threshold) { |
| 414 float lambda = numerator / denominator; | 418 float lambda = numerator / denominator; |
| 415 mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum); | 419 mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum); |
| 416 } | 420 } |
| 417 return mask; | 421 return mask; |
| 418 } | 422 } |
| 419 | 423 |
| 420 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, | 424 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
| 421 complex_f* const* output) { | 425 complex_f* const* output) { |
| 422 complex_f* output_channel = output[0]; | 426 complex_f* output_channel = output[0]; |
| 423 for (int f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 427 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| 424 output_channel[f_ix] = complex_f(0.f, 0.f); | 428 output_channel[f_ix] = complex_f(0.f, 0.f); |
| 425 | 429 |
| 426 const complex_f* delay_sum_mask_els = | 430 const complex_f* delay_sum_mask_els = |
| 427 normalized_delay_sum_masks_[f_ix].elements()[0]; | 431 normalized_delay_sum_masks_[f_ix].elements()[0]; |
| 428 for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { | 432 for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
| 429 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; | 433 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
| 430 } | 434 } |
| 431 | 435 |
| 432 output_channel[f_ix] *= final_mask_[f_ix]; | 436 output_channel[f_ix] *= final_mask_[f_ix]; |
| 433 } | 437 } |
| 434 } | 438 } |
| 435 | 439 |
| 436 // Smooth new_mask_ into time_smooth_mask_. | 440 // Smooth new_mask_ into time_smooth_mask_. |
| 437 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { | 441 void NonlinearBeamformer::ApplyMaskTimeSmoothing() { |
| 438 for (int i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { | 442 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |
| 439 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + | 443 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + |
| 440 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; | 444 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; |
| 441 } | 445 } |
| 442 } | 446 } |
| 443 | 447 |
| 444 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. | 448 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency. |
| 445 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { | 449 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { |
| 446 // Smooth over frequency in both directions. The "frequency correction" | 450 // Smooth over frequency in both directions. The "frequency correction" |
| 447 // regions have constant value, but we enter them to smooth over the jump | 451 // regions have constant value, but we enter them to smooth over the jump |
| 448 // that exists at the boundary. However, this does mean when smoothing "away" | 452 // that exists at the boundary. However, this does mean when smoothing "away" |
| 449 // from the region that we only need to use the last element. | 453 // from the region that we only need to use the last element. |
| 450 // | 454 // |
| 451 // Upward smoothing: | 455 // Upward smoothing: |
| 452 // low_mean_start_bin_ | 456 // low_mean_start_bin_ |
| 453 // v | 457 // v |
| 454 // |------|------------|------| | 458 // |------|------------|------| |
| 455 // ^------------------>^ | 459 // ^------------------>^ |
| 456 // | 460 // |
| 457 // Downward smoothing: | 461 // Downward smoothing: |
| 458 // high_mean_end_bin_ | 462 // high_mean_end_bin_ |
| 459 // v | 463 // v |
| 460 // |------|------------|------| | 464 // |------|------------|------| |
| 461 // ^<------------------^ | 465 // ^<------------------^ |
| 462 std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_); | 466 std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_); |
| 463 for (int i = low_mean_start_bin_; i < kNumFreqBins; ++i) { | 467 for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) { |
| 464 final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] + | 468 final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] + |
| 465 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1]; | 469 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1]; |
| 466 } | 470 } |
| 467 for (int i = high_mean_end_bin_ + 1; i > 0; --i) { | 471 for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) { |
| 468 final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] + | 472 final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] + |
| 469 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i]; | 473 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i]; |
| 470 } | 474 } |
| 471 } | 475 } |
| 472 | 476 |
| 473 // Apply low frequency correction to time_smooth_mask_. | 477 // Apply low frequency correction to time_smooth_mask_. |
| 474 void NonlinearBeamformer::ApplyLowFrequencyCorrection() { | 478 void NonlinearBeamformer::ApplyLowFrequencyCorrection() { |
| 475 const float low_frequency_mask = | 479 const float low_frequency_mask = |
| 476 MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1); | 480 MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1); |
| 477 std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_, | 481 std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_, |
| 478 low_frequency_mask); | 482 low_frequency_mask); |
| 479 } | 483 } |
| 480 | 484 |
| 481 // Apply high frequency correction to time_smooth_mask_. Update | 485 // Apply high frequency correction to time_smooth_mask_. Update |
| 482 // high_pass_postfilter_mask_ to use for the high frequency time-domain bands. | 486 // high_pass_postfilter_mask_ to use for the high frequency time-domain bands. |
| 483 void NonlinearBeamformer::ApplyHighFrequencyCorrection() { | 487 void NonlinearBeamformer::ApplyHighFrequencyCorrection() { |
| 484 high_pass_postfilter_mask_ = | 488 high_pass_postfilter_mask_ = |
| 485 MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1); | 489 MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1); |
| 486 std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1, | 490 std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1, |
| 487 time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_); | 491 time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_); |
| 488 } | 492 } |
| 489 | 493 |
| 490 // Compute mean over the given range of time_smooth_mask_, [first, last). | 494 // Compute mean over the given range of time_smooth_mask_, [first, last). |
| 491 float NonlinearBeamformer::MaskRangeMean(int first, int last) { | 495 float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) { |
| 492 DCHECK_GT(last, first); | 496 DCHECK_GT(last, first); |
| 493 const float sum = std::accumulate(time_smooth_mask_ + first, | 497 const float sum = std::accumulate(time_smooth_mask_ + first, |
| 494 time_smooth_mask_ + last, 0.f); | 498 time_smooth_mask_ + last, 0.f); |
| 495 return sum / (last - first); | 499 return sum / (last - first); |
| 496 } | 500 } |
| 497 | 501 |
| 498 void NonlinearBeamformer::EstimateTargetPresence() { | 502 void NonlinearBeamformer::EstimateTargetPresence() { |
| 499 const int quantile = | 503 const size_t quantile = static_cast<size_t>( |
| 500 (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile + | 504 (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile + |
| 501 low_mean_start_bin_; | 505 low_mean_start_bin_); |
| 502 std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile, | 506 std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile, |
| 503 new_mask_ + high_mean_end_bin_ + 1); | 507 new_mask_ + high_mean_end_bin_ + 1); |
| 504 if (new_mask_[quantile] > kMaskTargetThreshold) { | 508 if (new_mask_[quantile] > kMaskTargetThreshold) { |
| 505 is_target_present_ = true; | 509 is_target_present_ = true; |
| 506 interference_blocks_count_ = 0; | 510 interference_blocks_count_ = 0; |
| 507 } else { | 511 } else { |
| 508 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; | 512 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; |
| 509 } | 513 } |
| 510 } | 514 } |
| 511 | 515 |
| 512 } // namespace webrtc | 516 } // namespace webrtc |
| OLD | NEW |