Chromium Code Reviews| Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc | 
| diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc | 
| index d3f9b33bc2e845bea4789e1b94ac66d0075699bb..70107204a380da539c289a250a01647e81c087a2 100644 | 
| --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc | 
| +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc | 
| @@ -29,13 +29,6 @@ const float kKbdAlpha = 1.5f; | 
| const float kSpeedOfSoundMeterSeconds = 343; | 
| -// For both target and interference angles, PI / 2 is perpendicular to the | 
| -// microphone array, facing forwards. The positive direction goes | 
| -// counterclockwise. | 
| -// The angle at which we amplify sound. | 
| -// TODO(aluebs): Make the target angle dynamically settable. | 
| -const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f; | 
| - | 
| // The minimum separation in radians between the target direction and an | 
| // interferer scenario. | 
| const float kMinAwayRadians = 0.2f; | 
| @@ -50,7 +43,7 @@ const float kAwaySlope = 0.008f; | 
| // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) | 
| const float kBalance = 0.95f; | 
| -const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f; | 
| +const float kHalfBeamWidthRadians = DegreesToRadians(20.f); | 
| // Alpha coefficients for mask smoothing. | 
| const float kMaskTimeSmoothAlpha = 0.2f; | 
| @@ -185,16 +178,50 @@ std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) { | 
| return array_geometry; | 
| } | 
| +Point PairDirection(const Point& a, const Point& b) { | 
| + return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()}; | 
| +} | 
| + | 
| +float DotProduct(const Point& a, const Point& b) { | 
| + return a.x() * b.x() + a.y() * b.y() + a.z() * b.z(); | 
| +} | 
| + | 
| +bool IsGeometryLinear(std::vector<Point> array_geometry) { | 
| 
 
Andrew MacDonald
2015/10/21 03:27:43
DCHECK_GT(array_geometry.size(), 1);
 
aluebs-webrtc
2015/10/27 18:08:15
Done.
 
 | 
| + const float kMinDotProduct = 0.9999f; | 
| + const float kMinNorm = 1e-6f; | 
| + const Point first_pair_direction = | 
| + PairDirection(array_geometry[0], array_geometry[1]); | 
| + float norm_a = std::max( | 
| + kMinNorm, DotProduct(first_pair_direction, first_pair_direction)); | 
| + for (size_t i = 2u; i < array_geometry.size(); ++i) { | 
| + const Point pair_direction = | 
| + PairDirection(array_geometry[i - 1], array_geometry[i]); | 
| + float norm_b = | 
| 
 
Andrew MacDonald
2015/10/21 03:27:43
const
 
aluebs-webrtc
2015/10/27 18:08:15
Done.
 
 | 
| + std::max(kMinNorm, DotProduct(pair_direction, pair_direction)); | 
| + if (std::abs(DotProduct(first_pair_direction, pair_direction) / | 
| + std::sqrt(norm_a * norm_b)) > kMinDotProduct) { | 
| + return false; | 
| + } | 
| + } | 
| + return true; | 
| +} | 
| + | 
| +Point AzimuthToPoint(float azimuth) { | 
| + return Point(std::cos(azimuth), std::sin(azimuth), 0.f); | 
| +} | 
| + | 
| } // namespace | 
| // static | 
| const size_t NonlinearBeamformer::kNumFreqBins; | 
| NonlinearBeamformer::NonlinearBeamformer( | 
| - const std::vector<Point>& array_geometry) | 
| + const std::vector<Point>& array_geometry, | 
| + SphericalPointf target_direction) | 
| : num_input_channels_(array_geometry.size()), | 
| array_geometry_(GetCenteredArray(array_geometry)), | 
| - min_mic_spacing_(GetMinimumSpacing(array_geometry)) { | 
| + min_mic_spacing_(GetMinimumSpacing(array_geometry)), | 
| + target_angle_radians_(target_direction.azimuth()) { | 
| WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); | 
| } | 
| @@ -202,7 +229,6 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 
| chunk_length_ = | 
| static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); | 
| sample_rate_hz_ = sample_rate_hz; | 
| - InitFrequencyCorrectionRanges(); | 
| high_pass_postfilter_mask_ = 1.f; | 
| is_target_present_ = false; | 
| @@ -223,52 +249,45 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | 
| wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; | 
| } | 
| - // Initialize all nonadaptive values before looping through the frames. | 
| - InitInterfAngles(); | 
| - InitDelaySumMasks(); | 
| - InitTargetCovMats(); | 
| - InitInterfCovMats(); | 
| + InitLowFrequencyCorrectionRanges(); | 
| + InitDiffuseCovMats(); | 
| + AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); | 
| +} | 
| - for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| - rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 
| - rpsiws_[i].clear(); | 
| - for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 
| - rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | 
| - } | 
| - } | 
| +// These bin indexes determine the regions over which a mean is taken. This is | 
| +// applied as a constant value over the adjacent end "frequency correction" | 
| +// regions. | 
| +// | 
| +// low_mean_start_bin_ high_mean_start_bin_ | 
| +// v v constant | 
| +// |----------------|--------|----------------|-------|----------------| | 
| +// constant ^ ^ | 
| +// low_mean_end_bin_ high_mean_end_bin_ | 
| +// | 
| +void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { | 
| + low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | 
| + low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | 
| + | 
| + RTC_DCHECK_GT(low_mean_start_bin_, 0U); | 
| + RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | 
| } | 
| -void NonlinearBeamformer::InitFrequencyCorrectionRanges() { | 
| +void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { | 
| const float kAliasingFreqHz = | 
| kSpeedOfSoundMeterSeconds / | 
| - (min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); | 
| + (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); | 
| const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, | 
| sample_rate_hz_ / 2.f); | 
| const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, | 
| sample_rate_hz_ / 2.f); | 
| - | 
| - low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); | 
| - low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); | 
| high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); | 
| high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); | 
| - // These bin indexes determine the regions over which a mean is taken. This | 
| - // is applied as a constant value over the adjacent end "frequency correction" | 
| - // regions. | 
| - // | 
| - // low_mean_start_bin_ high_mean_start_bin_ | 
| - // v v constant | 
| - // |----------------|--------|----------------|-------|----------------| | 
| - // constant ^ ^ | 
| - // low_mean_end_bin_ high_mean_end_bin_ | 
| - // | 
| - RTC_DCHECK_GT(low_mean_start_bin_, 0U); | 
| - RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); | 
| + | 
| RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); | 
| RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); | 
| RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); | 
| } | 
| - | 
| void NonlinearBeamformer::InitInterfAngles() { | 
| 
 
Andrew MacDonald
2015/10/21 03:27:43
This looks good, but it's probably complicated eno
 
aluebs-webrtc
2015/10/27 18:08:15
Good point! :)
And the great part about writing un
 
 | 
| const float kAwayRadians = | 
| std::min(static_cast<float>(M_PI), | 
| @@ -276,22 +295,43 @@ void NonlinearBeamformer::InitInterfAngles() { | 
| min_mic_spacing_)); | 
| interf_angles_radians_.clear(); | 
| - // TODO(aluebs): When the target angle is settable, make sure the interferer | 
| - // scenarios aren't reflected over the target one for linear geometries. | 
| - interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); | 
| - interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); | 
| + if (IsGeometryLinear(array_geometry_)) { | 
| + const Point array_direction = | 
| + PairDirection(array_geometry_[0], array_geometry_[1]); | 
| + const Point array_normal(array_direction.y(), -array_direction.x(), 0.f); | 
| + const Point target_direction = AzimuthToPoint(target_angle_radians_); | 
| + const Point clockwise_interf_direction = | 
| + AzimuthToPoint(target_angle_radians_ - kAwayRadians); | 
| + if (DotProduct(array_normal, target_direction) * | 
| 
 
Andrew MacDonald
2015/10/21 03:27:43
Add some comments to each of these conditions, bec
 
aluebs-webrtc
2015/10/27 18:08:15
Done.
 
 | 
| + DotProduct(array_normal, clockwise_interf_direction) >= | 
| + 0.f) { | 
| + interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians); | 
| + } else { | 
| + interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians + | 
| + M_PI); | 
| + } | 
| + const Point counterclock_interf_direction = | 
| + AzimuthToPoint(target_angle_radians_ + kAwayRadians); | 
| + if (DotProduct(array_normal, target_direction) * | 
| + DotProduct(array_normal, counterclock_interf_direction) >= | 
| + 0.f) { | 
| + interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians); | 
| + } else { | 
| + interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians - | 
| + M_PI); | 
| + } | 
| + } else { | 
| + interf_angles_radians_.push_back(target_angle_radians_ - kAwayRadians); | 
| + interf_angles_radians_.push_back(target_angle_radians_ + kAwayRadians); | 
| + } | 
| } | 
| void NonlinearBeamformer::InitDelaySumMasks() { | 
| for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { | 
| delay_sum_masks_[f_ix].Resize(1, num_input_channels_); | 
| - CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, | 
| - kFftSize, | 
| - sample_rate_hz_, | 
| - kSpeedOfSoundMeterSeconds, | 
| - array_geometry_, | 
| - kTargetAngleRadians, | 
| - &delay_sum_masks_[f_ix]); | 
| + CovarianceMatrixGenerator::PhaseAlignmentMasks( | 
| + f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, | 
| + array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); | 
| complex_f norm_factor = sqrt( | 
| ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); | 
| @@ -309,15 +349,19 @@ void NonlinearBeamformer::InitTargetCovMats() { | 
| } | 
| } | 
| +void NonlinearBeamformer::InitDiffuseCovMats() { | 
| + for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| + uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); | 
| + CovarianceMatrixGenerator::UniformCovarianceMatrix( | 
| + wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]); | 
| + complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; | 
| + uniform_cov_mat_[i].Scale(1.f / normalization_factor); | 
| + uniform_cov_mat_[i].Scale(1 - kBalance); | 
| + } | 
| +} | 
| + | 
| void NonlinearBeamformer::InitInterfCovMats() { | 
| for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| - ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); | 
| - CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], | 
| - array_geometry_, | 
| - &uniform_cov_mat); | 
| - complex_f normalization_factor = uniform_cov_mat.elements()[0][0]; | 
| - uniform_cov_mat.Scale(1.f / normalization_factor); | 
| - uniform_cov_mat.Scale(1 - kBalance); | 
| interf_cov_mats_[i].clear(); | 
| for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 
| interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, | 
| @@ -333,11 +377,21 @@ void NonlinearBeamformer::InitInterfCovMats() { | 
| array_geometry_, | 
| &angled_cov_mat); | 
| // Normalize matrices before averaging them. | 
| - normalization_factor = angled_cov_mat.elements()[0][0]; | 
| + complex_f normalization_factor = angled_cov_mat.elements()[0][0]; | 
| angled_cov_mat.Scale(1.f / normalization_factor); | 
| // Weighted average of matrices. | 
| angled_cov_mat.Scale(kBalance); | 
| - interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); | 
| + interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); | 
| + } | 
| + } | 
| +} | 
| + | 
| +void NonlinearBeamformer::NormalizeCovMats() { | 
| + for (size_t i = 0; i < kNumFreqBins; ++i) { | 
| + rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); | 
| + rpsiws_[i].clear(); | 
| + for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { | 
| + rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); | 
| } | 
| } | 
| } | 
| @@ -372,10 +426,20 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, | 
| } | 
| } | 
| +void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { | 
| + target_angle_radians_ = target_direction.azimuth(); | 
| + InitHighFrequencyCorrectionRanges(); | 
| + InitInterfAngles(); | 
| + InitDelaySumMasks(); | 
| + InitTargetCovMats(); | 
| + InitInterfCovMats(); | 
| + NormalizeCovMats(); | 
| +} | 
| + | 
| bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { | 
| // If more than half-beamwidth degrees away from the beam's center, | 
| // you are out of the beam. | 
| - return fabs(spherical_point.azimuth() - kTargetAngleRadians) < | 
| + return fabs(spherical_point.azimuth() - target_angle_radians_) < | 
| kHalfBeamWidthRadians; | 
| } |