Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
index f5bdd6a3c2d0308bea77074301f6d8f688c89ead..8a68c7ab315bab25b2eec2c0314590dbdaeeb80a 100644 |
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
@@ -122,18 +122,6 @@ size_t Round(float x) { |
return static_cast<size_t>(std::floor(x + 0.5f)); |
} |
-// Calculates the sum of absolute values of a complex matrix. |
-float SumAbs(const ComplexMatrix<float>& mat) { |
- float sum_abs = 0.f; |
- const complex<float>* const* mat_els = mat.elements(); |
- for (size_t i = 0; i < mat.num_rows(); ++i) { |
- for (size_t j = 0; j < mat.num_columns(); ++j) { |
- sum_abs += std::abs(mat_els[i][j]); |
- } |
- } |
- return sum_abs; |
-} |
- |
// Calculates the sum of squares of a complex matrix. |
float SumSquares(const ComplexMatrix<float>& mat) { |
float sum_squares = 0.f; |
@@ -183,6 +171,31 @@ const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); |
// static |
const size_t NonlinearBeamformer::kNumFreqBins; |
+PostFilterTransform::PostFilterTransform(size_t chunk_length, |
+ float* window, |
+ size_t fft_size) |
+ : transform_(1u, 1u, chunk_length, window, fft_size, fft_size / 2, this), |
+ num_freq_bins_(fft_size / 2 + 1) {} |
+ |
+void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) { |
+ final_mask_ = final_mask; |
+ transform_.ProcessChunk(data, data); |
+} |
+ |
+void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input, |
+ size_t num_input_channels, |
+ size_t num_freq_bins, |
+ size_t num_output_channels, |
+ complex<float>* const* output) { |
+ RTC_CHECK_EQ(num_freq_bins_, num_freq_bins); |
+ RTC_CHECK_EQ(1u, num_input_channels); |
+ RTC_CHECK_EQ(1u, num_output_channels); |
+ |
+ for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) { |
+ output[0][f_ix] = kCompensationGain * final_mask_[f_ix] * input[0][f_ix]; |
+ } |
+} |
+ |
NonlinearBeamformer::NonlinearBeamformer( |
const std::vector<Point>& array_geometry, |
SphericalPointf target_direction) |
@@ -208,18 +221,21 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
interference_blocks_count_ = hold_target_blocks_; |
- lapped_transform_.reset(new LappedTransform(num_input_channels_, |
- 1, |
- chunk_length_, |
- window_, |
- kFftSize, |
- kFftSize / 2, |
- this)); |
+ process_transform_.reset(new LappedTransform(num_input_channels_, |
+ 1u, |
+ chunk_length_, |
+ window_, |
+ kFftSize, |
+ kFftSize / 2, |
+ this)); |
+ postfilter_transform_.reset(new PostFilterTransform( |
+ chunk_length_, window_, kFftSize)); |
+ const float wave_number_step = |
+ (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds); |
for (size_t i = 0; i < kNumFreqBins; ++i) { |
time_smooth_mask_[i] = 1.f; |
final_mask_[i] = 1.f; |
- float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
- wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
+ wave_numbers_[i] = i * wave_number_step; |
} |
InitLowFrequencyCorrectionRanges(); |
@@ -306,9 +322,6 @@ void NonlinearBeamformer::InitDelaySumMasks() { |
complex_f norm_factor = sqrt( |
ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
- normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
- normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
- normalized_delay_sum_masks_[f_ix])); |
} |
} |
@@ -371,21 +384,33 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
- float old_high_pass_mask = high_pass_postfilter_mask_; |
- lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
- // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
- // discontinuities. |
- const float ramp_increment = |
- (high_pass_postfilter_mask_ - old_high_pass_mask) / |
- input.num_frames_per_band(); |
- // Apply the smoothed high-pass mask to the first channel of each band. |
+ old_high_pass_mask_ = high_pass_postfilter_mask_; |
+ process_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
+ // Copy over only the first channel of each band. |
// This can be done because the effect of the linear beamformer is negligible |
// compared to the post-filter. |
- for (size_t i = 1; i < input.num_bands(); ++i) { |
- float smoothed_mask = old_high_pass_mask; |
- for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
+ for (size_t i = 0; i < input.num_bands(); ++i) { |
peah-webrtc
2016/05/26 08:48:53
If the output is removed from ProcessChunk, these
aluebs-webrtc
2016/05/28 03:00:00
See above on why I think the output should not be
|
+ memcpy(output->channels(i)[0], |
+ input.channels(i)[0], |
+ input.num_frames_per_band() * sizeof(output->channels(i)[0][0])); |
+ } |
+} |
+ |
+void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) { |
+ RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_); |
+ |
+ postfilter_transform_->ProcessChunk(data->channels(0), final_mask_); |
+ |
+ // Ramp up/down for smoothing is needed in order to avoid discontinuities in |
+ // the transitions between 10 ms frames. |
+ const float ramp_increment = |
+ (high_pass_postfilter_mask_ - old_high_pass_mask_) / |
+ data->num_frames_per_band(); |
+ for (size_t i = 1; i < data->num_bands(); ++i) { |
+ float smoothed_mask = old_high_pass_mask_; |
+ for (size_t j = 0; j < data->num_frames_per_band(); ++j) { |
smoothed_mask += ramp_increment; |
- output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; |
+ data->channels(i)[0][j] *= smoothed_mask; |
} |
} |
} |
@@ -456,7 +481,7 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
ApplyLowFrequencyCorrection(); |
ApplyHighFrequencyCorrection(); |
ApplyMaskFrequencySmoothing(); |
- ApplyMasks(input, output); |
+ memcpy(output[0], input[0], kNumFreqBins * sizeof(input[0][0])); |
peah-webrtc
2016/05/26 08:48:53
This memcopy is not neccessary, why not instead di
aluebs-webrtc
2016/05/28 03:00:00
See above why I don't think we should drop the out
|
} |
float NonlinearBeamformer::CalculatePostfilterMask( |
@@ -484,22 +509,6 @@ float NonlinearBeamformer::CalculatePostfilterMask( |
return numerator / denominator; |
} |
-void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
- complex_f* const* output) { |
- complex_f* output_channel = output[0]; |
- for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
- output_channel[f_ix] = complex_f(0.f, 0.f); |
- |
- const complex_f* delay_sum_mask_els = |
- normalized_delay_sum_masks_[f_ix].elements()[0]; |
- for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
- output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
- } |
- |
- output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; |
- } |
-} |
- |
// Smooth new_mask_ into time_smooth_mask_. |
void NonlinearBeamformer::ApplyMaskTimeSmoothing() { |
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |