Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(492)

Unified Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1378973003: Implement new version of the NonlinearBeamformer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fix float constant Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
index da7ad0da59c1d663175e6ed878ab1a8aea06407d..40e738c6d1fc6a4735763c178dc3259124435d17 100644
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
@@ -27,34 +27,23 @@ namespace {
// Alpha for the Kaiser Bessel Derived window.
const float kKbdAlpha = 1.5f;
-// The minimum value a post-processing mask can take.
-const float kMaskMinimum = 0.01f;
-
const float kSpeedOfSoundMeterSeconds = 343;
// For both target and interference angles, PI / 2 is perpendicular to the
// microphone array, facing forwards. The positive direction goes
// counterclockwise.
// The angle at which we amplify sound.
+// TODO(aluebs): Make the target angle dynamically settable.
const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
-// The angle at which we suppress sound. Suppression is symmetric around PI / 2
-// radians, so sound is suppressed at both +|kInterfAngleRadians| and
-// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
-// suppress sound coming from close angles as well.
-const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
-
// When calculating the interference covariance matrix, this is the weight for
// the weighted average between the uniform covariance matrix and the angled
// covariance matrix.
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
-const float kBalance = 0.4f;
+const float kBalance = 0.95f;
const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
-// TODO(claguna): need comment here.
-const float kBeamwidthConstant = 0.00002f;
-
// Alpha coefficients for mask smoothing.
const float kMaskTimeSmoothAlpha = 0.2f;
const float kMaskFrequencySmoothAlpha = 0.6f;
@@ -64,17 +53,33 @@ const float kMaskFrequencySmoothAlpha = 0.6f;
const int kLowMeanStartHz = 200;
const int kLowMeanEndHz = 400;
+// TODO(aluebs): Make the high frequency correction range depend on the target
+// angle.
const int kHighMeanStartHz = 3000;
const int kHighMeanEndHz = 5000;
+// Range limiter for subtractive terms in the nominator and denominator of the
+// postfilter expression. It handles the scenario mismatch between the true and
+// model sources (target and interference).
+const float kCutOffConstant = 0.9999f;
+
// Quantile of mask values which is used to estimate target presence.
const float kMaskQuantile = 0.7f;
// Mask threshold over which the data is considered signal and not interference.
-const float kMaskTargetThreshold = 0.3f;
+// It has to be updated every time the postfilter calculation is changed
+// significantly.
+// TODO(aluebs): Write a tool to tune the target threshold automatically based
+// on files annotated with target and interference ground truth.
+const float kMaskTargetThreshold = 0.01f;
// Time in seconds after which the data is considered interference if the mask
// does not pass |kMaskTargetThreshold|.
const float kHoldTargetSeconds = 0.25f;
+// To compensate for the attenuation this algorithm introduces to the target
+// signal. It was estimated empirically from a low-noise low-reverberation
+// recording from broadside.
+const float kCompensationGain = 2.f;
+
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
// used; to accomplish this, we compute both multiplications in the same loop.
// The returned norm is clamped to be non-negative.
@@ -218,7 +223,6 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_;
-
lapped_transform_.reset(new LappedTransform(num_input_channels_,
1,
chunk_length_,
@@ -231,24 +235,34 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
final_mask_[i] = 1.f;
float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
- mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
- kBeamwidthConstant * wave_numbers_[i] *
- wave_numbers_[i];
}
// Initialize all nonadaptive values before looping through the frames.
+ InitInterfAngles();
InitDelaySumMasks();
InitTargetCovMats();
InitInterfCovMats();
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
- rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
- reflected_rpsiws_[i] =
- Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
+ rpsiws_[i].clear();
+ for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
+ rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
+ }
}
}
+void NonlinearBeamformer::InitInterfAngles() {
+ // TODO(aluebs): Make kAwayRadians dependent on the mic spacing.
+ const float kAwayRadians = 0.5;
+
+ interf_angles_radians_.clear();
+ // TODO(aluebs): When the target angle is settable, make sure the interferer
+ // scenarios aren't reflected over the target one for linear geometries.
+ interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians);
+ interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians);
+}
+
void NonlinearBeamformer::InitDelaySumMasks() {
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
@@ -273,40 +287,39 @@ void NonlinearBeamformer::InitTargetCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
- complex_f normalization_factor = target_cov_mats_[i].Trace();
- target_cov_mats_[i].Scale(1.f / normalization_factor);
}
}
void NonlinearBeamformer::InitInterfCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
- interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
- ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
-
CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
array_geometry_,
&uniform_cov_mat);
-
- CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
- kInterfAngleRadians,
- i,
- kFftSize,
- kNumFreqBins,
- sample_rate_hz_,
- array_geometry_,
- &angled_cov_mat);
- // Normalize matrices before averaging them.
- complex_f normalization_factor = uniform_cov_mat.Trace();
+ complex_f normalization_factor = uniform_cov_mat.elements()[0][0];
uniform_cov_mat.Scale(1.f / normalization_factor);
- normalization_factor = angled_cov_mat.Trace();
- angled_cov_mat.Scale(1.f / normalization_factor);
-
- // Average matrices.
uniform_cov_mat.Scale(1 - kBalance);
- angled_cov_mat.Scale(kBalance);
- interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
- reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
+ interf_cov_mats_[i].clear();
+ for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
+ interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
+ num_input_channels_));
+ ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
+ CovarianceMatrixGenerator::AngledCovarianceMatrix(
+ kSpeedOfSoundMeterSeconds,
+ interf_angles_radians_[j],
+ i,
+ kFftSize,
+ kNumFreqBins,
+ sample_rate_hz_,
+ array_geometry_,
+ &angled_cov_mat);
+ // Normalize matrices before averaging them.
+ normalization_factor = angled_cov_mat.elements()[0][0];
+ angled_cov_mat.Scale(1.f / normalization_factor);
+ // Weighted average of matrices.
+ angled_cov_mat.Scale(kBalance);
+ interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat);
+ }
}
}
@@ -376,17 +389,19 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
rmw *= rmw;
float rmw_r = rmw.real();
- new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
- rpsiws_[i],
+ new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],
+ rpsiws_[i][0],
ratio_rxiw_rxim,
- rmw_r,
- mask_thresholds_[i]);
-
- new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
- reflected_rpsiws_[i],
- ratio_rxiw_rxim,
- rmw_r,
- mask_thresholds_[i]);
+ rmw_r);
+ for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {
+ float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],
+ rpsiws_[i][j],
+ ratio_rxiw_rxim,
+ rmw_r);
+ if (tmp_mask < new_mask_[i]) {
+ new_mask_[i] = tmp_mask;
+ }
+ }
}
ApplyMaskTimeSmoothing();
@@ -401,24 +416,16 @@ float NonlinearBeamformer::CalculatePostfilterMask(
const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
- float rmw_r,
- float mask_threshold) {
+ float rmw_r) {
float rpsim = Norm(interf_cov_mat, eig_m_);
- // Find lambda.
float ratio = 0.f;
if (rpsim > 0.f) {
ratio = rpsiw / rpsim;
}
- float numerator = rmw_r - ratio;
- float denominator = ratio_rxiw_rxim - ratio;
- float mask = 1.f;
- if (denominator > mask_threshold) {
- float lambda = numerator / denominator;
- mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
- }
- return mask;
+ return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /
+ (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));
}
void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
@@ -433,7 +440,7 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
}
- output_channel[f_ix] *= final_mask_[f_ix];
+ output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
}
}
« no previous file with comments | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698