Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(262)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1982183002: Pull out the PostFilter to its own NonlinearBeamformer API (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
13 13
14 // MSVC++ requires this to be set before any other includes to get M_PI. 14 // MSVC++ requires this to be set before any other includes to get M_PI.
15 #define _USE_MATH_DEFINES 15 #define _USE_MATH_DEFINES
16 16
17 #include <math.h> 17 #include <math.h>
18 18
19 #include <memory> 19 #include <memory>
20 #include <vector> 20 #include <vector>
21 21
22 #include "webrtc/common_audio/lapped_transform.h" 22 #include "webrtc/common_audio/lapped_transform.h"
23 #include "webrtc/common_audio/channel_buffer.h" 23 #include "webrtc/common_audio/channel_buffer.h"
24 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" 24 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"
25 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" 25 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
26 26
27 namespace webrtc { 27 namespace webrtc {
28 28
29 class PostFilterTransform;
30
29 // Enhances sound sources coming directly in front of a uniform linear array 31 // Enhances sound sources coming directly in front of a uniform linear array
30 // and suppresses sound sources coming from all other directions. Operates on 32 // and suppresses sound sources coming from all other directions. Operates on
31 // multichannel signals and produces single-channel output. 33 // multichannel signals and produces single-channel output.
32 // 34 //
33 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear 35 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
34 // Beamforming Postprocessor" by Bastiaan Kleijn. 36 // Beamforming Postprocessor" by Bastiaan Kleijn.
35 class NonlinearBeamformer 37 class NonlinearBeamformer
36 : public Beamformer<float>, 38 : public Beamformer<float>,
37 public LappedTransform::Callback { 39 public LappedTransform::Callback {
40 friend class PostFilterTransform;
38 public: 41 public:
39 static const float kHalfBeamWidthRadians; 42 static const float kHalfBeamWidthRadians;
40 43
41 explicit NonlinearBeamformer( 44 explicit NonlinearBeamformer(
42 const std::vector<Point>& array_geometry, 45 const std::vector<Point>& array_geometry,
43 SphericalPointf target_direction = 46 SphericalPointf target_direction =
44 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f)); 47 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));
45 48
46 // Sample rate corresponds to the lower band. 49 // Sample rate corresponds to the lower band.
47 // Needs to be called before the NonlinearBeamformer can be used. 50 // Needs to be called before the NonlinearBeamformer can be used.
48 void Initialize(int chunk_size_ms, int sample_rate_hz) override; 51 void Initialize(int chunk_size_ms, int sample_rate_hz) override;
49 52
50 // Process one time-domain chunk of audio. The audio is expected to be split 53 // Process one time-domain chunk of audio. The audio is expected to be split
51 // into frequency bands inside the ChannelBuffer. The number of frames and 54 // into frequency bands inside the ChannelBuffer. The number of frames and
52 // channels must correspond to the constructor parameters. The same 55 // channels must correspond to the constructor parameters. The same
53 // ChannelBuffer can be passed in as |input| and |output|. 56 // ChannelBuffer can be passed in as |input| and |output|.
54 void ProcessChunk(const ChannelBuffer<float>& input, 57 void ProcessChunk(const ChannelBuffer<float>& input,
55 ChannelBuffer<float>* output) override; 58 ChannelBuffer<float>* output) override;
59 // Applies the postfilter mask to one chunk of audio. The audio is expected to
peah-webrtc 2016/05/22 21:06:48 I think that this description is longer than requi
aluebs-webrtc 2016/05/26 01:04:45 I personally think adding more documentation than
peah-webrtc 2016/05/26 08:48:52 Great!
60 // be split into frequency bands inside the ChannelBuffer. The number of
61 // frames must correspond to the constructor parameters and the number of
62 // channels is expected to be 1, since that is the output number of channels
63 // of ProcessChunk(). The same ChannelBuffer can be passed in as |input| and
64 // |output|.
65 void PostFilter(const ChannelBuffer<float>& input,
66 ChannelBuffer<float>* output) override;
56 67
57 void AimAt(const SphericalPointf& target_direction) override; 68 void AimAt(const SphericalPointf& target_direction) override;
58 69
59 bool IsInBeam(const SphericalPointf& spherical_point) override; 70 bool IsInBeam(const SphericalPointf& spherical_point) override;
60 71
61 // After processing each block |is_target_present_| is set to true if the 72 // After processing each block |is_target_present_| is set to true if the
62 // target signal es present and to false otherwise. This methods can be called 73 // target signal es present and to false otherwise. This methods can be called
63 // to know if the data is target signal or interference and process it 74 // to know if the data is target signal or interference and process it
64 // accordingly. 75 // accordingly.
65 bool is_target_present() override { return is_target_present_; } 76 bool is_target_present() override { return is_target_present_; }
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 // Postfilter masks are also unreliable at high frequencies. Average mid-high 120 // Postfilter masks are also unreliable at high frequencies. Average mid-high
110 // frequency masks to calculate a single mask per block which can be applied 121 // frequency masks to calculate a single mask per block which can be applied
111 // in the time-domain. Further, we average these block-masks over a chunk, 122 // in the time-domain. Further, we average these block-masks over a chunk,
112 // resulting in one postfilter mask per audio chunk. This allows us to skip 123 // resulting in one postfilter mask per audio chunk. This allows us to skip
113 // both transforming and blocking the high-frequency signal. 124 // both transforming and blocking the high-frequency signal.
114 void ApplyHighFrequencyCorrection(); 125 void ApplyHighFrequencyCorrection();
115 126
116 // Compute the means needed for the above frequency correction. 127 // Compute the means needed for the above frequency correction.
117 float MaskRangeMean(size_t start_bin, size_t end_bin); 128 float MaskRangeMean(size_t start_bin, size_t end_bin);
118 129
119 // Applies both sets of masks to |input| and store in |output|. 130 // Applies delay-and-sum mask to |input| and store in |output|.
120 void ApplyMasks(const complex_f* const* input, complex_f* const* output); 131 void ApplyDelayAndSum(const complex_f* const* input,
132 complex_f* const* output);
133 // Applies post-filter mask to |input| and store in |output|.
134 void ApplyPostFilter(const complex_f* input, complex_f* output);
121 135
122 void EstimateTargetPresence(); 136 void EstimateTargetPresence();
123 137
124 static const size_t kFftSize = 256; 138 static const size_t kFftSize = 256;
125 static const size_t kNumFreqBins = kFftSize / 2 + 1; 139 static const size_t kNumFreqBins = kFftSize / 2 + 1;
126 140
127 // Deals with the fft transform and blocking. 141 // Deals with the fft transform and blocking.
128 size_t chunk_length_; 142 size_t chunk_length_;
129 std::unique_ptr<LappedTransform> lapped_transform_; 143 std::unique_ptr<LappedTransform> process_transform_;
144 std::unique_ptr<LappedTransform> postfilter_transform_;
130 float window_[kFftSize]; 145 float window_[kFftSize];
131 146
132 // Parameters exposed to the user. 147 // Parameters exposed to the user.
133 const size_t num_input_channels_; 148 const size_t num_input_channels_;
134 int sample_rate_hz_; 149 int sample_rate_hz_;
135 150
136 const std::vector<Point> array_geometry_; 151 const std::vector<Point> array_geometry_;
137 // The normal direction of the array if it has one and it is in the xy-plane. 152 // The normal direction of the array if it has one and it is in the xy-plane.
138 const rtc::Optional<Point> array_normal_; 153 const rtc::Optional<Point> array_normal_;
139 154
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 // Of length |kNumFreqBins|. 194 // Of length |kNumFreqBins|.
180 float rxiws_[kNumFreqBins]; 195 float rxiws_[kNumFreqBins];
181 // The vector has a size equal to the number of interferer scenarios. 196 // The vector has a size equal to the number of interferer scenarios.
182 std::vector<float> rpsiws_[kNumFreqBins]; 197 std::vector<float> rpsiws_[kNumFreqBins];
183 198
184 // The microphone normalization factor. 199 // The microphone normalization factor.
185 ComplexMatrixF eig_m_; 200 ComplexMatrixF eig_m_;
186 201
187 // For processing the high-frequency input signal. 202 // For processing the high-frequency input signal.
188 float high_pass_postfilter_mask_; 203 float high_pass_postfilter_mask_;
204 float old_high_pass_mask_;
189 205
190 // True when the target signal is present. 206 // True when the target signal is present.
191 bool is_target_present_; 207 bool is_target_present_;
192 // Number of blocks after which the data is considered interference if the 208 // Number of blocks after which the data is considered interference if the
193 // mask does not pass |kMaskSignalThreshold|. 209 // mask does not pass |kMaskSignalThreshold|.
194 size_t hold_target_blocks_; 210 size_t hold_target_blocks_;
195 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. 211 // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
196 size_t interference_blocks_count_; 212 size_t interference_blocks_count_;
197 }; 213 };
198 214
199 } // namespace webrtc 215 } // namespace webrtc
200 216
201 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 217 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698