Chromium Code Reviews| Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| index 0f76dbf0c620bc97b88a878125c8f4b45083eb3e..83d1fe5c9f10b6add16c509f34886deee375e736 100644 |
| --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| @@ -29,13 +29,6 @@ const float kKbdAlpha = 1.5f; |
| const float kSpeedOfSoundMeterSeconds = 343; |
| -// For both target and interference angles, PI / 2 is perpendicular to the |
| -// microphone array, facing forwards. The positive direction goes |
| -// counterclockwise. |
| -// The angle at which we amplify sound. |
| -// TODO(aluebs): Make the target angle dynamically settable. |
| -const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; |
| - |
| // The minimum separation in radians between the target direction and a |
| // interferer scenario. |
| const float kMinAwayRadians = 0.2f; |
| @@ -199,16 +192,56 @@ float GetMinimumSpacing(std::vector<Point> array_geometry) { |
| return mic_spacing; |
| } |
| +float DotProduct(std::vector<float> a, std::vector<float> b) { |
|
Andrew MacDonald
2015/10/14 22:12:30
const references.
Andrew MacDonald
2015/10/20 03:00:08
Looks like you missed the remainder of the comment
aluebs-webrtc
2015/10/21 01:41:40
I totally missed them, sorry about that. Addressin
aluebs-webrtc
2015/10/21 01:41:40
Done.
|
| + RTC_DCHECK_EQ(a.size(), b.size()); |
| + float dot_product = 0.f; |
| + for (size_t i = 0u; i < a.size(); ++i) { |
| + dot_product += a[i] * b[i]; |
| + } |
| + return dot_product; |
| +} |
| + |
| +float NormalizedDotProduct(std::vector<float> a, std::vector<float> b) { |
|
Andrew MacDonald
2015/10/14 22:12:31
const references.
aluebs-webrtc
2015/10/21 01:41:40
Removed this function.
|
| + const float kMinNorm = 1e-6f; |
| + float norm_a = DotProduct(a, a); |
|
Andrew MacDonald
2015/10/14 22:12:30
You're repeatedly computing this for first_pair_di
aluebs-webrtc
2015/10/21 01:41:40
I removed the NormalizedDotProduct and did the nor
|
| + float norm_b = DotProduct(b, b); |
| + if (norm_a > kMinNorm && norm_b > kMinNorm) { |
| + return DotProduct(a, b) / std::sqrt(norm_a * norm_b); |
| + } else { |
| + return 0.f; |
| + } |
| +} |
| + |
| +bool IsGeometryLinear(std::vector<Point> array_geometry) { |
| + const float kMinDotProduct = 0.9999f; |
| + bool is_geometry_linear = true; |
| + std::vector<float> directiona; |
|
Andrew MacDonald
2015/10/14 22:12:31
first_pair_direction
aluebs-webrtc
2015/10/21 01:41:39
Done.
|
| + directiona.push_back(array_geometry[1].x() - array_geometry[0].x()); |
| + directiona.push_back(array_geometry[1].y() - array_geometry[0].y()); |
| + directiona.push_back(array_geometry[1].z() - array_geometry[0].z()); |
|
Andrew MacDonald
2015/10/14 22:12:31
Write a helper for this, something like:
std::vect
aluebs-webrtc
2015/10/21 01:41:40
I though about using Point, but I thought it was s
|
| + for (size_t i = 2u; i < array_geometry.size(); ++i) { |
| + std::vector<float> directionb; |
| + directionb.push_back(array_geometry[i].x() - array_geometry[i - 1].x()); |
|
Andrew MacDonald
2015/10/14 22:12:31
pair_direction
aluebs-webrtc
2015/10/21 01:41:40
Done.
|
| + directionb.push_back(array_geometry[i].y() - array_geometry[i - 1].y()); |
| + directionb.push_back(array_geometry[i].z() - array_geometry[i - 1].z()); |
| + is_geometry_linear &= |
| + std::abs(NormalizedDotProduct(directiona, directionb)) > kMinDotProduct; |
|
Andrew MacDonald
2015/10/14 22:12:31
Instead, short circuit:
if (std::abs(Normalized
aluebs-webrtc
2015/10/21 01:41:40
I believe that function with only one return state
|
| + } |
| + return is_geometry_linear; |
| +} |
| + |
| } // namespace |
| // static |
| const size_t NonlinearBeamformer::kNumFreqBins; |
| NonlinearBeamformer::NonlinearBeamformer( |
| - const std::vector<Point>& array_geometry) |
| + const std::vector<Point>& array_geometry, |
| + float target_angle_radians) |
| : num_input_channels_(array_geometry.size()), |
| array_geometry_(GetCenteredArray(array_geometry)), |
| - mic_spacing_(GetMinimumSpacing(array_geometry)) { |
| + mic_spacing_(GetMinimumSpacing(array_geometry)), |
| + target_angle_radians_(target_angle_radians) { |
| WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); |
| } |
| @@ -216,7 +249,6 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| chunk_length_ = |
| static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); |
| sample_rate_hz_ = sample_rate_hz; |
| - InitFrequencyCorrectionRanges(); |
| high_pass_postfilter_mask_ = 1.f; |
| is_target_present_ = false; |
| @@ -237,52 +269,45 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
| } |
| - // Initialize all nonadaptive values before looping through the frames. |
| - InitInterfAngles(); |
| - InitDelaySumMasks(); |
| - InitTargetCovMats(); |
| - InitInterfCovMats(); |
| + InitLowFrequencyCorrectionRanges(); |
| + InitDifuseCovMats(); |
|
Andrew MacDonald
2015/10/14 22:12:31
Diffuse
aluebs-webrtc
2015/10/21 01:41:40
Done.
|
| + SteerBeam(target_angle_radians_); |
| +} |
| - for (size_t i = 0; i < kNumFreqBins; ++i) { |
| - rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
| - rpsiws_[i].clear(); |
| - for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
| - rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
| - } |
| - } |
| +// These bin indexes determine the regions over which a mean is taken. This is |
| +// applied as a constant value over the adjacent end "frequency correction" |
| +// regions. |
| +// |
| +// low_mean_start_bin_ high_mean_start_bin_ |
| +// v v constant |
| +// |----------------|--------|----------------|-------|----------------| |
| +// constant ^ ^ |
| +// low_mean_end_bin_ high_mean_end_bin_ |
| +// |
| +void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { |
| + low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); |
| + low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); |
| + |
| + RTC_DCHECK_GT(low_mean_start_bin_, 0U); |
| + RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
| } |
| -void NonlinearBeamformer::InitFrequencyCorrectionRanges() { |
| +void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { |
| const float kAliasingFreqHz = |
| kSpeedOfSoundMeterSeconds / |
| - (mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); |
| + (mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); |
| const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, |
| sample_rate_hz_ / 2.f); |
| const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, |
| sample_rate_hz_ / 2.f); |
| - |
| - low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); |
| - low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); |
| high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); |
| high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); |
| - // These bin indexes determine the regions over which a mean is taken. This |
| - // is applied as a constant value over the adjacent end "frequency correction" |
| - // regions. |
| - // |
| - // low_mean_start_bin_ high_mean_start_bin_ |
| - // v v constant |
| - // |----------------|--------|----------------|-------|----------------| |
| - // constant ^ ^ |
| - // low_mean_end_bin_ high_mean_end_bin_ |
| - // |
| - RTC_DCHECK_GT(low_mean_start_bin_, 0U); |
| - RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); |
| + |
| RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); |
| RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); |
| RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); |
| } |
| - |
| void NonlinearBeamformer::InitInterfAngles() { |
| const float kAway = |
| std::min(static_cast<float>(M_PI), |
| @@ -290,10 +315,21 @@ void NonlinearBeamformer::InitInterfAngles() { |
| kAwaySlope * static_cast<float>(M_PI) / mic_spacing_)); |
| interf_angles_radians_.clear(); |
| - // TODO(aluebs): When the target angle is settable, make sure the interferer |
| - // scenarios aren't reflected over the target one for linear geometries. |
| - interf_angles_radians_.push_back(kTargetAngleRadians - kAway); |
| - interf_angles_radians_.push_back(kTargetAngleRadians + kAway); |
| + if (IsGeometryLinear(array_geometry_)) { |
| + if (target_angle_radians_ - kAway >= 0.f) { |
|
Andrew MacDonald
2015/10/14 22:12:30
These values depend on the array being parallel wi
aluebs-webrtc
2015/10/21 01:41:40
Great Point. Now I generalized it.
|
| + interf_angles_radians_.push_back(target_angle_radians_ - kAway); |
| + } else { |
| + interf_angles_radians_.push_back(M_PI); |
| + } |
| + if (target_angle_radians_ + kAway <= M_PI) { |
| + interf_angles_radians_.push_back(target_angle_radians_ + kAway); |
| + } else { |
| + interf_angles_radians_.push_back(0.f); |
| + } |
| + } else { |
| + interf_angles_radians_.push_back(target_angle_radians_ - kAway); |
| + interf_angles_radians_.push_back(target_angle_radians_ + kAway); |
| + } |
| } |
| void NonlinearBeamformer::InitDelaySumMasks() { |
| @@ -304,7 +340,7 @@ void NonlinearBeamformer::InitDelaySumMasks() { |
| sample_rate_hz_, |
| kSpeedOfSoundMeterSeconds, |
| array_geometry_, |
| - kTargetAngleRadians, |
| + target_angle_radians_, |
| &delay_sum_masks_[f_ix]); |
| complex_f norm_factor = sqrt( |
| @@ -323,15 +359,20 @@ void NonlinearBeamformer::InitTargetCovMats() { |
| } |
| } |
| -void NonlinearBeamformer::InitInterfCovMats() { |
| +void NonlinearBeamformer::InitDifuseCovMats() { |
| for (size_t i = 0; i < kNumFreqBins; ++i) { |
| - ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); |
| + uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); |
| CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], |
| array_geometry_, |
| - &uniform_cov_mat); |
| - complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; |
| - uniform_cov_mat.Scale(1.f / normalization_factor); |
| - uniform_cov_mat.Scale(1 - kBalance); |
| + &uniform_cov_mat_[i]); |
| + complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; |
| + uniform_cov_mat_[i].Scale(1.f / normalization_factor); |
| + uniform_cov_mat_[i].Scale(1 - kBalance); |
| + } |
| +} |
| + |
| +void NonlinearBeamformer::InitInterfCovMats() { |
| + for (size_t i = 0; i < kNumFreqBins; ++i) { |
| interf_cov_mats_[i].clear(); |
| for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
| interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, |
| @@ -347,11 +388,21 @@ void NonlinearBeamformer::InitInterfCovMats() { |
| array_geometry_, |
| &angled_cov_mat); |
| // Normalize matrices before averaging them. |
| - normalization_factor = angled_cov_mat.elements()[0][0]; |
| + complex_f normalization_factor = angled_cov_mat.elements()[0][0]; |
| angled_cov_mat.Scale(1.f / normalization_factor); |
| // Weighted average of matrices. |
| angled_cov_mat.Scale(kBalance); |
| - interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); |
| + interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); |
| + } |
| + } |
| +} |
| + |
| +void NonlinearBeamformer::NormalizeCovMats() { |
| + for (size_t i = 0; i < kNumFreqBins; ++i) { |
| + rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); |
| + rpsiws_[i].clear(); |
| + for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { |
| + rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); |
| } |
| } |
| } |
| @@ -386,10 +437,20 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
| } |
| } |
| +void NonlinearBeamformer::SteerBeam(float target_angle_radians) { |
| + target_angle_radians_ = target_angle_radians; |
| + InitHighFrequencyCorrectionRanges(); |
| + InitInterfAngles(); |
| + InitDelaySumMasks(); |
| + InitTargetCovMats(); |
| + InitInterfCovMats(); |
| + NormalizeCovMats(); |
| +} |
| + |
| bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { |
| // If more than half-beamwidth degrees away from the beam's center, |
| // you are out of the beam. |
| - return fabs(spherical_point.azimuth() - kTargetAngleRadians) < |
| + return fabs(spherical_point.azimuth() - target_angle_radians_) < |
| kHalfBeamWidthRadians; |
| } |