webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc - Issue 1378973003: Implement new version of the NonlinearBeamformer

Unified Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1378973003: Implement new version of the NonlinearBeamformer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Widen beam and compensate attenuation Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc ('K') | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

index da7ad0da59c1d663175e6ed878ab1a8aea06407d..6a30cbfbb3272a014f063ca07a7eb4d0c9949357 100644

--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

@@ -27,34 +27,23 @@ namespace {

// Alpha for the Kaiser Bessel Derived window.

const float kKbdAlpha = 1.5f;

-// The minimum value a post-processing mask can take.

-const float kMaskMinimum = 0.01f;

const float kSpeedOfSoundMeterSeconds = 343;

// For both target and interference angles, PI / 2 is perpendicular to the

// microphone array, facing forwards. The positive direction goes

// counterclockwise.

// The angle at which we amplify sound.

+// TODO(aluebs): Make the target angle dynamically settable.

const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;

-// The angle at which we suppress sound. Suppression is symmetric around PI / 2

-// radians, so sound is suppressed at both +|kInterfAngleRadians| and

-// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should

-// suppress sound coming from close angles as well.

-const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;

// When calculating the interference covariance matrix, this is the weight for

// the weighted average between the uniform covariance matrix and the angled

// covariance matrix.

// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)

-const float kBalance = 0.4f;

+const float kBalance = 0.95f;

const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;

-// TODO(claguna): need comment here.

-const float kBeamwidthConstant = 0.00002f;

// Alpha coefficients for mask smoothing.

const float kMaskTimeSmoothAlpha = 0.2f;

const float kMaskFrequencySmoothAlpha = 0.6f;

@@ -64,17 +53,34 @@ const float kMaskFrequencySmoothAlpha = 0.6f;

const int kLowMeanStartHz = 200;

const int kLowMeanEndHz = 400;

+// TODO(aluebs): Make the high frequency correction range depend on the target

+// angle.

const int kHighMeanStartHz = 3000;

const int kHighMeanEndHz = 5000;

+// Range limiter for subtractive terms in the nominator and denominator of the

+// postfilter expression. It handles the scenario mismatch between the true and

+// model sources (target and interference).

+const float kCutOffConstant = 0.9999;

// Quantile of mask values which is used to estimate target presence.

const float kMaskQuantile = 0.7f;

// Mask threshold over which the data is considered signal and not interference.

-const float kMaskTargetThreshold = 0.3f;

+// It has to be updated every time the postfilter calculation is changed

+// significantly.

+// TODO(aluebs): Write a tool to tune the target threshold automatically based

+// on files annotated with target and interference ground truth.

+const float kMaskTargetThreshold = 0.01f;

// Time in seconds after which the data is considered interference if the mask

// does not pass |kMaskTargetThreshold|.

const float kHoldTargetSeconds = 0.25f;

+// To compensate for the attenuation this algorithm introduces to the target

+// signal. It was estimated empirically from a low-noise low-reverberation

+// recording from broadside, since if both channels are exactly the same no

Andrew MacDonald 2015/10/13 21:55:16 Perhaps drop the "since if both channels..." part,

aluebs-webrtc 2015/10/16 16:41:14 I dropped it.

+// attenuation is introduced.

+const float kCompensationGain = 2.f;

// used; to accomplish this, we compute both multiplications in the same loop.

// The returned norm is clamped to be non-negative.

@@ -218,7 +224,6 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {

hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;

interference_blocks_count_ = hold_target_blocks_;

lapped_transform_.reset(new LappedTransform(num_input_channels_,

chunk_length_,

@@ -231,24 +236,34 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {

final_mask_[i] = 1.f;

float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;

wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;

- mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *

- kBeamwidthConstant * wave_numbers_[i] *

- wave_numbers_[i];

}

// Initialize all nonadaptive values before looping through the frames.

+ InitInterfAngles();

InitDelaySumMasks();

InitTargetCovMats();

InitInterfCovMats();

for (size_t i = 0; i < kNumFreqBins; ++i) {

rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);

- rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);

- reflected_rpsiws_[i] =

- Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);

+ rpsiws_[i].clear();

+ for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

+ rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));

+ }

}

+void NonlinearBeamformer::InitInterfAngles() {

+ // TODO(aluebs): Make kAway dependent on the mic spacing.

+ const float kAway = 0.5;

Andrew MacDonald 2015/10/13 21:55:16 Does this reduce interferer suppression as well, o

aluebs-webrtc 2015/10/16 16:41:14 The suppression gradual moving away from the cente

+ interf_angles_radians_.clear();

+ // TODO(aluebs): When the target angle is settable, make sure the interferer

+ // scenarios aren't reflected over the target one for linear geometries.

+ interf_angles_radians_.push_back(kTargetAngleRadians - kAway);

+ interf_angles_radians_.push_back(kTargetAngleRadians + kAway);

void NonlinearBeamformer::InitDelaySumMasks() {

for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {

delay_sum_masks_[f_ix].Resize(1, num_input_channels_);

@@ -273,40 +288,39 @@ void NonlinearBeamformer::InitTargetCovMats() {

for (size_t i = 0; i < kNumFreqBins; ++i) {

target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);

TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);

- complex_f normalization_factor = target_cov_mats_[i].Trace();

- target_cov_mats_[i].Scale(1.f / normalization_factor);

}

void NonlinearBeamformer::InitInterfCovMats() {

for (size_t i = 0; i < kNumFreqBins; ++i) {

- interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);

ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);

- ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],

array_geometry_,

&uniform_cov_mat);

- CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,

- kInterfAngleRadians,

- i,

- kFftSize,

- kNumFreqBins,

- sample_rate_hz_,

- array_geometry_,

- &angled_cov_mat);

- // Normalize matrices before averaging them.

- complex_f normalization_factor = uniform_cov_mat.Trace();

+ complex_f normalization_factor = uniform_cov_mat.elements()[0][0];

uniform_cov_mat.Scale(1.f / normalization_factor);

- normalization_factor = angled_cov_mat.Trace();

- angled_cov_mat.Scale(1.f / normalization_factor);

- // Average matrices.

uniform_cov_mat.Scale(1 - kBalance);

- angled_cov_mat.Scale(kBalance);

- interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);

- reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);

+ interf_cov_mats_[i].clear();

+ for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

+ interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,

+ num_input_channels_));

+ ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);

+ CovarianceMatrixGenerator::AngledCovarianceMatrix(

+ kSpeedOfSoundMeterSeconds,

+ interf_angles_radians_[j],

+ i,

+ kFftSize,

+ kNumFreqBins,

+ sample_rate_hz_,

+ array_geometry_,

+ &angled_cov_mat);

+ // Normalize matrices before averaging them.

+ normalization_factor = angled_cov_mat.elements()[0][0];

+ angled_cov_mat.Scale(1.f / normalization_factor);

+ // Weighted average of matrices.

+ angled_cov_mat.Scale(kBalance);

+ interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat);

+ }

}

@@ -376,17 +390,19 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,

rmw *= rmw;

float rmw_r = rmw.real();

- new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],

- rpsiws_[i],

+ new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],

+ rpsiws_[i][0],

ratio_rxiw_rxim,

- rmw_r,

- mask_thresholds_[i]);

- new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],

- reflected_rpsiws_[i],

- ratio_rxiw_rxim,

- rmw_r,

- mask_thresholds_[i]);

+ rmw_r);

+ for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {

+ float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],

+ rpsiws_[i][j],

+ ratio_rxiw_rxim,

+ rmw_r);

+ if (tmp_mask < new_mask_[i]) {

+ new_mask_[i] = tmp_mask;

+ }

}

ApplyMaskTimeSmoothing();

@@ -401,24 +417,16 @@ float NonlinearBeamformer::CalculatePostfilterMask(

const ComplexMatrixF& interf_cov_mat,

float rpsiw,

float ratio_rxiw_rxim,

- float rmw_r,

- float mask_threshold) {

+ float rmw_r) {

float rpsim = Norm(interf_cov_mat, eig_m_);

- // Find lambda.

float ratio = 0.f;

if (rpsim > 0.f) {

ratio = rpsiw / rpsim;

}

- float numerator = rmw_r - ratio;

- float denominator = ratio_rxiw_rxim - ratio;

- float mask = 1.f;

- if (denominator > mask_threshold) {

- float lambda = numerator / denominator;

- mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);

- }

- return mask;

+ return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /

+ (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));

}

void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,

@@ -433,7 +441,7 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,

output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];

}

- output_channel[f_ix] *= final_mask_[f_ix];

+ output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];

}