Chromium Code Reviews| Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| index f5bdd6a3c2d0308bea77074301f6d8f688c89ead..b5d5197edc0e6218c548368e49d27e3cddaabbe4 100644 |
| --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc |
| @@ -183,6 +183,30 @@ const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); |
| // static |
|
peah-webrtc
2016/05/22 21:06:48
Please correct this comment as well while you are
aluebs-webrtc
2016/05/26 01:04:45
What is wrong with it?
peah-webrtc
2016/05/26 08:48:52
It is not a proper sentence, and not terminated by
aluebs-webrtc
2016/05/28 03:00:00
I think the static definition is clearer like this
peah-webrtc
2016/05/30 11:49:25
I think the guidelines should be applied regardles
aluebs-webrtc
2016/06/01 00:16:34
Acknowledged.
|
| const size_t NonlinearBeamformer::kNumFreqBins; |
| +class PostFilterTransform : public LappedTransform::Callback { |
|
peah-webrtc
2016/05/22 21:06:48
This class is a way to be able to use the callback
aluebs-webrtc
2016/05/26 01:04:45
I don't see how this simplifies the code, but I ag
peah-webrtc
2016/05/26 08:48:52
The simplification is separation of concerns and d
aluebs-webrtc
2016/05/28 03:00:00
I think the separation/encapsulation is almost the
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
|
| + public: |
| + explicit PostFilterTransform(NonlinearBeamformer* beamformer) |
| + : beamformer_(beamformer) {} |
| + |
| + protected: |
| + // Process one frequency-domain block of audio. This is where the fun |
|
peah-webrtc
2016/05/22 21:06:48
Please describe this more thoroughly. I'm not sure
aluebs-webrtc
2016/05/26 01:04:45
Removed comment. It was just to be consistent with
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
|
| + // happens. Implements LappedTransform::Callback. |
| + void ProcessAudioBlock(const complex<float>* const* input, |
| + size_t num_input_channels, |
| + size_t num_freq_bins, |
| + size_t num_output_channels, |
| + complex<float>* const* output) override { |
| + RTC_CHECK_EQ(NonlinearBeamformer::kNumFreqBins, num_freq_bins); |
| + RTC_CHECK_EQ(1u, num_input_channels); |
| + RTC_CHECK_EQ(1u, num_output_channels); |
| + |
| + beamformer_->ApplyPostFilter(input[0], output[0]); |
| + } |
| + |
| + private: |
| + NonlinearBeamformer* beamformer_; |
| +}; |
| + |
| NonlinearBeamformer::NonlinearBeamformer( |
| const std::vector<Point>& array_geometry, |
| SphericalPointf target_direction) |
| @@ -208,13 +232,16 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { |
| hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |
| interference_blocks_count_ = hold_target_blocks_; |
| - lapped_transform_.reset(new LappedTransform(num_input_channels_, |
| - 1, |
| - chunk_length_, |
| - window_, |
| - kFftSize, |
| - kFftSize / 2, |
| - this)); |
| + process_transform_.reset(new LappedTransform(num_input_channels_, |
| + 1u, |
| + chunk_length_, |
| + window_, |
| + kFftSize, |
| + kFftSize / 2, |
| + this)); |
| + postfilter_transform_.reset(new LappedTransform( |
| + 1u, 1u, chunk_length_, window_, kFftSize, kFftSize / 2, |
| + new PostFilterTransform(this))); |
| for (size_t i = 0; i < kNumFreqBins; ++i) { |
| time_smooth_mask_[i] = 1.f; |
| final_mask_[i] = 1.f; |
| @@ -371,18 +398,31 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, |
| RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); |
| RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
| - float old_high_pass_mask = high_pass_postfilter_mask_; |
| - lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| + old_high_pass_mask_ = high_pass_postfilter_mask_; |
| + process_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| + // Copy over only the first channel of each band. |
|
peah-webrtc
2016/05/22 21:06:48
Have you checked the impact on the signal when thi
aluebs-webrtc
2016/05/26 01:04:45
This is no longer relevant, since we decided offli
peah-webrtc
2016/05/26 08:48:52
I think the perfect reconstruction may actually be
aluebs-webrtc
2016/05/28 03:00:00
I meant that other components are already non-line
peah-webrtc
2016/05/30 11:49:25
Acknowledged.
|
| + // This can be done because the effect of the linear beamformer is negligible |
| + // compared to the post-filter. |
| + for (size_t i = 1; i < input.num_bands(); ++i) { |
| + memcpy(output->channels(i)[0], |
|
peah-webrtc
2016/05/22 21:06:48
what happens if the output is dual channel? Since
aluebs-webrtc
2016/05/26 01:04:45
This can't be done as is, since the input and outp
peah-webrtc
2016/05/30 11:49:25
So this means that the input could have 2 channels
aluebs-webrtc
2016/06/01 00:16:34
I don't think that adding an additional interface
peah-webrtc
2016/06/01 14:51:01
It actually does not add code complexity, as it se
aluebs-webrtc
2016/06/01 22:13:20
Interface removed and changed to what input-only.
|
| + input.channels(i)[0], |
| + input.num_frames_per_band() * sizeof(output->channels(i)[0][0])); |
| + } |
| +} |
| + |
| +void NonlinearBeamformer::PostFilter(const ChannelBuffer<float>& input, |
| + ChannelBuffer<float>* output) { |
| + RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); |
| + |
| + postfilter_transform_->ProcessChunk(input.channels(0), output->channels(0)); |
| + |
| // Ramp up/down for smoothing. 1 mask per 10ms results in audible |
|
peah-webrtc
2016/05/22 21:06:48
I guess, what you mean is that smoothing is needed
aluebs-webrtc
2016/05/26 01:04:45
Adds unrelated changes to the CL, but if you think
peah-webrtc
2016/05/30 11:49:25
Sounds awesome!
|
| // discontinuities. |
| const float ramp_increment = |
| - (high_pass_postfilter_mask_ - old_high_pass_mask) / |
| + (high_pass_postfilter_mask_ - old_high_pass_mask_) / |
| input.num_frames_per_band(); |
| - // Apply the smoothed high-pass mask to the first channel of each band. |
| - // This can be done because the effect of the linear beamformer is negligible |
| - // compared to the post-filter. |
| for (size_t i = 1; i < input.num_bands(); ++i) { |
| - float smoothed_mask = old_high_pass_mask; |
| + float smoothed_mask = old_high_pass_mask_; |
| for (size_t j = 0; j < input.num_frames_per_band(); ++j) { |
| smoothed_mask += ramp_increment; |
| output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; |
| @@ -456,7 +496,7 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, |
| ApplyLowFrequencyCorrection(); |
| ApplyHighFrequencyCorrection(); |
| ApplyMaskFrequencySmoothing(); |
| - ApplyMasks(input, output); |
| + ApplyDelayAndSum(input, output); |
| } |
| float NonlinearBeamformer::CalculatePostfilterMask( |
| @@ -484,8 +524,8 @@ float NonlinearBeamformer::CalculatePostfilterMask( |
| return numerator / denominator; |
| } |
| -void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
| - complex_f* const* output) { |
| +void NonlinearBeamformer::ApplyDelayAndSum(const complex_f* const* input, |
| + complex_f* const* output) { |
| complex_f* output_channel = output[0]; |
| for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| output_channel[f_ix] = complex_f(0.f, 0.f); |
| @@ -495,8 +535,13 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, |
| for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { |
| output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; |
| } |
| + } |
| +} |
| - output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; |
| +void NonlinearBeamformer::ApplyPostFilter(const complex_f* input, |
| + complex_f* output) { |
| + for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { |
| + output[f_ix] = kCompensationGain * final_mask_[f_ix] * input[f_ix]; |
| } |
| } |