Chromium Code Reviews| Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| index f5bdd6a3c2d0308bea77074301f6d8f688c89ead..7b37604559eaba545df07ee3a227cf4e24e8890e 100644 |
| --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| @@ -122,18 +122,6 @@ size_t Round(float x) { |
| return static_cast<size_t>(std::floor(x + 0.5f)); |
| } |
| -// Calculates the sum of absolute values of a complex matrix. |
| -float SumAbs(const ComplexMatrix<float>& mat) { |
| - float sum_abs = 0.f; |
| - const complex<float>* const* mat_els = mat.elements(); |
| - for (size_t i = 0; i < mat.num_rows(); ++i) { |
| - for (size_t j = 0; j < mat.num_columns(); ++j) { |
| - sum_abs += std::abs(mat_els[i][j]); |
| - } |
| - } |
| - return sum_abs; |
| -} |
| - |
| // Calculates the sum of squares of a complex matrix. |
| float SumSquares(const ComplexMatrix<float>& mat) { |
| float sum_squares = 0.f; |
| @@ -183,6 +171,31 @@ const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); |
| // static |
| const size_t NonlinearBeamformer::kNumFreqBins; |
| +PostFilterTransform::PostFilterTransform(size_t chunk_length, |
| + float* window, |
| + size_t fft_size) |
| + : transform_(1u, 1u, chunk_length, window, fft_size, fft_size / 2, this), |
| + num_freq_bins_(fft_size / 2 + 1) {} |
| + |
| +void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) { |
| + final_mask_ = final_mask; |
| + transform_.ProcessChunk(data, data); |
| +} |
| + |
| +void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input, |
| + size_t num_input_channels, |
| + size_t num_freq_bins, |
| + size_t num_output_channels, |
| + complex<float>* const* output) { |
| + RTC_CHECK_EQ(num_freq_bins_, num_freq_bins); |
| + RTC_CHECK_EQ(1u, num_input_channels); |
|
peah-webrtc
2016/06/08 12:04:55
As it is now, the beamformer will crash the applic
aluebs-webrtc
2016/06/09 02:11:46
Constraint removed. Changed to DCHECK.
|
| + RTC_CHECK_EQ(1u, num_output_channels); |
| + |
| + for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) { |
| + output[0][f_ix] = kCompensationGain * final_mask_[f_ix] * input[0][f_ix]; |
| + } |
| +} |
| + |
| NonlinearBeamformer::NonlinearBeamformer( |
| const std::vector<Point>& array_geometry, |
| SphericalPointf target_direction) |
| @@ -208,18 +221,22 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
| interference_blocks_count_ = hold_target_blocks_; |
| - lapped_transform_.reset(new LappedTransform(num_input_channels_, |
| - 1, |
| - chunk_length_, |
| - window_, |
| - kFftSize, |
| - kFftSize / 2, |
| - this)); |
| + process_transform_.reset(new LappedTransform(num_input_channels_, |
| + 0u, |
| + chunk_length_, |
| + window_, |
| + kFftSize, |
| + kFftSize / 2, |
| + this)); |
| + postfilter_transform_.reset(new PostFilterTransform( |
| + chunk_length_, window_, kFftSize)); |
| + dummy_out_.reset(new ChannelBuffer<float>(chunk_length_, 0u)); |
|
peah-webrtc
2016/06/08 12:04:55
Why do we need to allocate a dummy output of a ful
aluebs-webrtc
2016/06/09 02:11:46
It doesn't allocate anything, since if there are 0
|
| + const float wave_number_step = |
| + (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds); |
| for (size_t i = 0; i < kNumFreqBins; ++i) { |
| time_smooth_mask_[i] = 1.f; |
| final_mask_[i] = 1.f; |
| - float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_; |
| - wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; |
| + wave_numbers_[i] = i * wave_number_step; |
| } |
| InitLowFrequencyCorrectionRanges(); |
| @@ -306,9 +323,6 @@ void NonlinearBeamformer::InitDelaySumMasks() { |
| complex_f norm_factor = sqrt( |
| ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); |
| delay_sum_masks_[f_ix].Scale(1.f / norm_factor); |
| - normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); |
| - normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( |
| - normalized_delay_sum_masks_[f_ix])); |
| } |
| } |
| @@ -366,26 +380,29 @@ void NonlinearBeamformer::NormalizeCovMats() { |
| } |
| } |
| -void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
| - ChannelBuffer<float>* output) { |
| - RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
| - RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
| +void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) { |
| + RTC_DCHECK_EQ(data.num_channels(), num_input_channels_); |
| + RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_); |
| + |
| + old_high_pass_mask_ = high_pass_postfilter_mask_; |
| + process_transform_->ProcessChunk(data.channels(0), dummy_out_->channels(0)); |
|
peah-webrtc
2016/06/08 12:04:55
Do you really need to pass a dummy output buffer h
aluebs-webrtc
2016/06/09 02:11:46
Good point, since I modified lapped_transform to a
|
| +} |
| + |
| +void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) { |
| + RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_); |
| - float old_high_pass_mask = high_pass_postfilter_mask_; |
| - lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| - // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
| - // discontinuities. |
| + postfilter_transform_->ProcessChunk(data->channels(0), final_mask_); |
| + |
| + // Ramp up/down for smoothing is needed in order to avoid discontinuities in |
| + // the transitions between 10 ms frames. |
| const float ramp_increment = |
| - (high_pass_postfilter_mask_ - old_high_pass_mask) / |
| - input.num_frames_per_band(); |
| - // Apply the smoothed high-pass mask to the first channel of each band. |
| - // This can be done because the effect of the linear beamformer is negligible |
| - // compared to the post-filter. |
| - for (size_t i = 1; i < input.num_bands(); ++i) { |
| - float smoothed_mask = old_high_pass_mask; |
| - for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
| + (high_pass_postfilter_mask_ - old_high_pass_mask_) / |
| + data->num_frames_per_band(); |
| + for (size_t i = 1; i < data->num_bands(); ++i) { |
| + float smoothed_mask = old_high_pass_mask_; |
| + for (size_t j = 0; j < data->num_frames_per_band(); ++j) { |
| smoothed_mask += ramp_increment; |
| - output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; |
| + data->channels(i)[0][j] *= smoothed_mask; |
|
peah-webrtc
2016/06/08 12:04:55
This code assumes single-channel processing, right
aluebs-webrtc
2016/06/09 02:11:46
Adjusted the postfilter to support multichannel da
|
| } |
| } |
| } |
| @@ -414,7 +431,7 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
| complex_f* const* output) { |
| RTC_CHECK_EQ(kNumFreqBins, num_freq_bins); |
| RTC_CHECK_EQ(num_input_channels_, num_input_channels); |
| - RTC_CHECK_EQ(1u, num_output_channels); |
| + RTC_CHECK_EQ(0u, num_output_channels); |
| // Calculating the post-filter masks. Note that we need two for each |
| // frequency bin to account for the positive and negative interferer |
| @@ -456,7 +473,6 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
| ApplyLowFrequencyCorrection(); |
| ApplyHighFrequencyCorrection(); |
| ApplyMaskFrequencySmoothing(); |
| - ApplyMasks(input, output); |
| } |
| float NonlinearBeamformer::CalculatePostfilterMask( |
| @@ -484,22 +500,6 @@ float NonlinearBeamformer::CalculatePostfilterMask( |
| return numerator / denominator; |
| } |
| -void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
| - complex_f* const* output) { |
| - complex_f* output_channel = output[0]; |
| - for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| - output_channel[f_ix] = complex_f(0.f, 0.f); |
| - |
| - const complex_f* delay_sum_mask_els = |
| - normalized_delay_sum_masks_[f_ix].elements()[0]; |
| - for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
| - output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
| - } |
| - |
| - output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; |
| - } |
| -} |
| - |
| // Smooth new_mask_ into time_smooth_mask_. |
| void NonlinearBeamformer::ApplyMaskTimeSmoothing() { |
| for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { |