Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(667)

Unified Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 1982183002: Pull out the PostFilter to its own NonlinearBeamformer API (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
index f5bdd6a3c2d0308bea77074301f6d8f688c89ead..b5d5197edc0e6218c548368e49d27e3cddaabbe4 100644
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
@@ -183,6 +183,30 @@ const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
// static
peah-webrtc 2016/05/22 21:06:48 Please correct this comment as well while you are
aluebs-webrtc 2016/05/26 01:04:45 What is wrong with it?
peah-webrtc 2016/05/26 08:48:52 It is not a proper sentence, and not terminated by
aluebs-webrtc 2016/05/28 03:00:00 I think the static definition is clearer like this
peah-webrtc 2016/05/30 11:49:25 I think the guidelines should be applied regardles
aluebs-webrtc 2016/06/01 00:16:34 Acknowledged.
const size_t NonlinearBeamformer::kNumFreqBins;
+class PostFilterTransform : public LappedTransform::Callback {
peah-webrtc 2016/05/22 21:06:48 This class is a way to be able to use the callback
aluebs-webrtc 2016/05/26 01:04:45 I don't see how this simplifies the code, but I ag
peah-webrtc 2016/05/26 08:48:52 The simplification is separation of concerns and d
aluebs-webrtc 2016/05/28 03:00:00 I think the separation/encapsulation is almost the
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
+ public:
+ explicit PostFilterTransform(NonlinearBeamformer* beamformer)
+ : beamformer_(beamformer) {}
+
+ protected:
+ // Process one frequency-domain block of audio. This is where the fun
peah-webrtc 2016/05/22 21:06:48 Please describe this more thoroughly. I'm not sure
aluebs-webrtc 2016/05/26 01:04:45 Removed comment. It was just to be consistent with
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
+ // happens. Implements LappedTransform::Callback.
+ void ProcessAudioBlock(const complex<float>* const* input,
+ size_t num_input_channels,
+ size_t num_freq_bins,
+ size_t num_output_channels,
+ complex<float>* const* output) override {
+ RTC_CHECK_EQ(NonlinearBeamformer::kNumFreqBins, num_freq_bins);
+ RTC_CHECK_EQ(1u, num_input_channels);
+ RTC_CHECK_EQ(1u, num_output_channels);
+
+ beamformer_->ApplyPostFilter(input[0], output[0]);
+ }
+
+ private:
+ NonlinearBeamformer* beamformer_;
+};
+
NonlinearBeamformer::NonlinearBeamformer(
const std::vector<Point>& array_geometry,
SphericalPointf target_direction)
@@ -208,13 +232,16 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_;
- lapped_transform_.reset(new LappedTransform(num_input_channels_,
- 1,
- chunk_length_,
- window_,
- kFftSize,
- kFftSize / 2,
- this));
+ process_transform_.reset(new LappedTransform(num_input_channels_,
+ 1u,
+ chunk_length_,
+ window_,
+ kFftSize,
+ kFftSize / 2,
+ this));
+ postfilter_transform_.reset(new LappedTransform(
+ 1u, 1u, chunk_length_, window_, kFftSize, kFftSize / 2,
+ new PostFilterTransform(this)));
for (size_t i = 0; i < kNumFreqBins; ++i) {
time_smooth_mask_[i] = 1.f;
final_mask_[i] = 1.f;
@@ -371,18 +398,31 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
- float old_high_pass_mask = high_pass_postfilter_mask_;
- lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
+ old_high_pass_mask_ = high_pass_postfilter_mask_;
+ process_transform_->ProcessChunk(input.channels(0), output->channels(0));
+ // Copy over only the first channel of each band.
peah-webrtc 2016/05/22 21:06:48 Have you checked the impact on the signal when thi
aluebs-webrtc 2016/05/26 01:04:45 This is no longer relevant, since we decided offli
peah-webrtc 2016/05/26 08:48:52 I think the perfect reconstruction may actually be
aluebs-webrtc 2016/05/28 03:00:00 I meant that other components are already non-line
peah-webrtc 2016/05/30 11:49:25 Acknowledged.
+ // This can be done because the effect of the linear beamformer is negligible
+ // compared to the post-filter.
+ for (size_t i = 1; i < input.num_bands(); ++i) {
+ memcpy(output->channels(i)[0],
peah-webrtc 2016/05/22 21:06:48 what happens if the output is dual channel? Since
aluebs-webrtc 2016/05/26 01:04:45 This can't be done as is, since the input and outp
peah-webrtc 2016/05/30 11:49:25 So this means that the input could have 2 channels
aluebs-webrtc 2016/06/01 00:16:34 I don't think that adding an additional interface
peah-webrtc 2016/06/01 14:51:01 It actually does not add code complexity, as it se
aluebs-webrtc 2016/06/01 22:13:20 Interface removed and changed to what input-only.
+ input.channels(i)[0],
+ input.num_frames_per_band() * sizeof(output->channels(i)[0][0]));
+ }
+}
+
+void NonlinearBeamformer::PostFilter(const ChannelBuffer<float>& input,
+ ChannelBuffer<float>* output) {
+ RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
+
+ postfilter_transform_->ProcessChunk(input.channels(0), output->channels(0));
+
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
peah-webrtc 2016/05/22 21:06:48 I guess, what you mean is that smoothing is needed
aluebs-webrtc 2016/05/26 01:04:45 Adds unrelated changes to the CL, but if you think
peah-webrtc 2016/05/30 11:49:25 Sounds awesome!
// discontinuities.
const float ramp_increment =
- (high_pass_postfilter_mask_ - old_high_pass_mask) /
+ (high_pass_postfilter_mask_ - old_high_pass_mask_) /
input.num_frames_per_band();
- // Apply the smoothed high-pass mask to the first channel of each band.
- // This can be done because the effect of the linear beamformer is negligible
- // compared to the post-filter.
for (size_t i = 1; i < input.num_bands(); ++i) {
- float smoothed_mask = old_high_pass_mask;
+ float smoothed_mask = old_high_pass_mask_;
for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
smoothed_mask += ramp_increment;
output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
@@ -456,7 +496,7 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
ApplyLowFrequencyCorrection();
ApplyHighFrequencyCorrection();
ApplyMaskFrequencySmoothing();
- ApplyMasks(input, output);
+ ApplyDelayAndSum(input, output);
}
float NonlinearBeamformer::CalculatePostfilterMask(
@@ -484,8 +524,8 @@ float NonlinearBeamformer::CalculatePostfilterMask(
return numerator / denominator;
}
-void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
- complex_f* const* output) {
+void NonlinearBeamformer::ApplyDelayAndSum(const complex_f* const* input,
+ complex_f* const* output) {
complex_f* output_channel = output[0];
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
output_channel[f_ix] = complex_f(0.f, 0.f);
@@ -495,8 +535,13 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
}
+ }
+}
- output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
+void NonlinearBeamformer::ApplyPostFilter(const complex_f* input,
+ complex_f* output) {
+ for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
+ output[f_ix] = kCompensationGain * final_mask_[f_ix] * input[f_ix];
}
}

Powered by Google App Engine
This is Rietveld 408576698