webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h - Issue 1982183002: Pull out the PostFilter to its own NonlinearBeamformer API

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1982183002: Pull out the PostFilter to its own NonlinearBeamformer API (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Remove ChannelBuffer fix Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/audio_processing_impl.cc ('K') | « webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h ('k') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

13	13

14 // MSVC++ requires this to be set before any other includes to get M_PI.	14 // MSVC++ requires this to be set before any other includes to get M_PI.

15 #define _USE_MATH_DEFINES	15 #define _USE_MATH_DEFINES

16	16

17 #include <math.h>	17 #include <math.h>

18	18

19 #include <memory>	19 #include <memory>

20 #include <vector>	20 #include <vector>

21	21

22 #include "webrtc/common_audio/lapped_transform.h"	22 #include "webrtc/common_audio/lapped_transform.h"

23 #include "webrtc/common_audio/channel_buffer.h"	23 #include "webrtc/common_audio/channel_buffer.h"

24 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"	24 #include "webrtc/modules/audio_processing/beamformer/array_util.h"

25 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"	25 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"

26	26

27 namespace webrtc {	27 namespace webrtc {

28	28

	29 class PostFilterTransform : public LappedTransform::Callback {

	30 public:

	31 PostFilterTransform(size_t num_channels,

	32 size_t chunk_length,

	33 float* window,

	34 size_t fft_size);

	35

	36 void ProcessChunk(float* const* data, float* final_mask);

	37

	38 protected:

	39 void ProcessAudioBlock(const complex<float>* const* input,

	40 size_t num_input_channels,

	41 size_t num_freq_bins,

	42 size_t num_output_channels,

	43 complex<float>* const* output) override;

	44

	45 private:

	46 LappedTransform transform_;

	47 const size_t num_freq_bins_;

	48 float* final_mask_;

	49 };

	50

29 // Enhances sound sources coming directly in front of a uniform linear array	51 // Enhances sound sources coming directly in front of a uniform linear array

30 // and suppresses sound sources coming from all other directions. Operates on	52 // and suppresses sound sources coming from all other directions. Operates on

31 // multichannel signals and produces single-channel output.	53 // multichannel signals and produces single-channel output.

32 //	54 //

33 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear	55 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear

34 // Beamforming Postprocessor" by Bastiaan Kleijn.	56 // Beamforming Postprocessor" by Bastiaan Kleijn.

35 class NonlinearBeamformer	57 class NonlinearBeamformer : public LappedTransform::Callback {

36 : public Beamformer<float>,

37 public LappedTransform::Callback {

38 public:	58 public:

39 static const float kHalfBeamWidthRadians;	59 static const float kHalfBeamWidthRadians;

40	60

41 explicit NonlinearBeamformer(	61 explicit NonlinearBeamformer(

42 const std::vector<Point>& array_geometry,	62 const std::vector<Point>& array_geometry,

	63 size_t num_postfilter_channels,

43 SphericalPointf target_direction =	64 SphericalPointf target_direction =

44 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));	65 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));

45	66

46 // Sample rate corresponds to the lower band.	67 // Sample rate corresponds to the lower band.

47 // Needs to be called before the NonlinearBeamformer can be used.	68 // Needs to be called before the NonlinearBeamformer can be used.

48 void Initialize(int chunk_size_ms, int sample_rate_hz) override;	69 void Initialize(int chunk_size_ms, int sample_rate_hz);

49	70

50 // Process one time-domain chunk of audio. The audio is expected to be split	71 // Analyzes one time-domain chunk of audio. The audio is expected to be split

51 // into frequency bands inside the ChannelBuffer. The number of frames and	72 // into frequency bands inside the ChannelBuffer. The number of frames and

52 // channels must correspond to the constructor parameters. The same	73 // channels must correspond to the constructor parameters.

53 // ChannelBuffer can be passed in as \|input\| and \|output\|.	74 void AnalyzeChunk(const ChannelBuffer<float>& data);

54 void ProcessChunk(const ChannelBuffer<float>& input,	75 // Applies the postfilter mask to one chunk of audio. The audio is expected to
	peah-webrtc 2016/06/23 22:02:40 Suggestion: Since the comments are quite long, I t Suggestion: Since the comments are quite long, I think the code would benefit in readability if there is a space between AnalyzeChunk and the following comment. aluebs-webrtc 2016/06/24 03:00:54 Done. Show quoted text On 2016/06/23 22:02:40, peah-webrtc wrote: > Suggestion: Since the comments are quite long, I think the code would benefit in > readability if there is a space between AnalyzeChunk and the following comment. Done.
55 ChannelBuffer<float>* output) override;	76 // be split into frequency bands inside the ChannelBuffer. The number of

	77 // frames must correspond to the constructor parameters and the number of

	78 // channels is expected to be 1, since that is the output number of channels
	peah-webrtc 2016/06/23 22:02:40 Please rewrite this comment as the constructor tak Please rewrite this comment as the constructor takes num_postfilter_channels as an input which means that the the number of channels could be more than 1. Furthermore, the ProcessChunk method is now called AnalyzeChunk and has no outputs. aluebs-webrtc 2016/06/24 03:00:54 Done. Show quoted text On 2016/06/23 22:02:40, peah-webrtc wrote: > Please rewrite this comment as the constructor takes num_postfilter_channels as > an input which means that the the number of channels could be more than 1. > Furthermore, the ProcessChunk method is now called AnalyzeChunk and has no > outputs. Done.
	79 // of ProcessChunk().

	80 void PostFilter(ChannelBuffer<float>* data);

56	81

57 void AimAt(const SphericalPointf& target_direction) override;	82 void AimAt(const SphericalPointf& target_direction);

58	83

59 bool IsInBeam(const SphericalPointf& spherical_point) override;	84 bool IsInBeam(const SphericalPointf& spherical_point);

60	85

61 // After processing each block \|is_target_present_\| is set to true if the	86 // After processing each block \|is_target_present_\| is set to true if the

62 // target signal es present and to false otherwise. This methods can be called	87 // target signal es present and to false otherwise. This methods can be called

63 // to know if the data is target signal or interference and process it	88 // to know if the data is target signal or interference and process it

64 // accordingly.	89 // accordingly.

65 bool is_target_present() override { return is_target_present_; }	90 bool is_target_present() { return is_target_present_; }

66	91

67 protected:	92 protected:

68 // Process one frequency-domain block of audio. This is where the fun	93 // Process one frequency-domain block of audio. This is where the fun

69 // happens. Implements LappedTransform::Callback.	94 // happens. Implements LappedTransform::Callback.

70 void ProcessAudioBlock(const complex<float>* const* input,	95 void ProcessAudioBlock(const complex<float>* const* input,

71 size_t num_input_channels,	96 size_t num_input_channels,

72 size_t num_freq_bins,	97 size_t num_freq_bins,

73 size_t num_output_channels,	98 size_t num_output_channels,

74 complex<float>* const* output) override;	99 complex<float>* const* output) override;

75	100

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 // Postfilter masks are also unreliable at high frequencies. Average mid-high	134 // Postfilter masks are also unreliable at high frequencies. Average mid-high

110 // frequency masks to calculate a single mask per block which can be applied	135 // frequency masks to calculate a single mask per block which can be applied

111 // in the time-domain. Further, we average these block-masks over a chunk,	136 // in the time-domain. Further, we average these block-masks over a chunk,

112 // resulting in one postfilter mask per audio chunk. This allows us to skip	137 // resulting in one postfilter mask per audio chunk. This allows us to skip

113 // both transforming and blocking the high-frequency signal.	138 // both transforming and blocking the high-frequency signal.

114 void ApplyHighFrequencyCorrection();	139 void ApplyHighFrequencyCorrection();

115	140

116 // Compute the means needed for the above frequency correction.	141 // Compute the means needed for the above frequency correction.

117 float MaskRangeMean(size_t start_bin, size_t end_bin);	142 float MaskRangeMean(size_t start_bin, size_t end_bin);

118	143

119 // Applies both sets of masks to \|input\| and store in \|output\|.	144 // Applies post-filter mask to \|input\| and store in \|output\|.

120 void ApplyMasks(const complex_f* const* input, complex_f* const* output);	145 void ApplyPostFilter(const complex_f* input, complex_f* output);

121	146

122 void EstimateTargetPresence();	147 void EstimateTargetPresence();

123	148

124 static const size_t kFftSize = 256;	149 static const size_t kFftSize = 256;

125 static const size_t kNumFreqBins = kFftSize / 2 + 1;	150 static const size_t kNumFreqBins = kFftSize / 2 + 1;

126	151

127 // Deals with the fft transform and blocking.	152 // Deals with the fft transform and blocking.

128 size_t chunk_length_;	153 size_t chunk_length_;

129 std::unique_ptr<LappedTransform> lapped_transform_;	154 std::unique_ptr<LappedTransform> process_transform_;

	155 std::unique_ptr<PostFilterTransform> postfilter_transform_;

130 float window_[kFftSize];	156 float window_[kFftSize];

131	157

132 // Parameters exposed to the user.	158 // Parameters exposed to the user.

133 const size_t num_input_channels_;	159 const size_t num_input_channels_;

	160 const size_t num_postfilter_channels_;

134 int sample_rate_hz_;	161 int sample_rate_hz_;

135	162

136 const std::vector<Point> array_geometry_;	163 const std::vector<Point> array_geometry_;

137 // The normal direction of the array if it has one and it is in the xy-plane.	164 // The normal direction of the array if it has one and it is in the xy-plane.

138 const rtc::Optional<Point> array_normal_;	165 const rtc::Optional<Point> array_normal_;

139	166

140 // Minimum spacing between microphone pairs.	167 // Minimum spacing between microphone pairs.

141 const float min_mic_spacing_;	168 const float min_mic_spacing_;

142	169

143 // Calculated based on user-input and constants in the .cc file.	170 // Calculated based on user-input and constants in the .cc file.

(...skipping 10 matching lines...) Expand all Loading...
154 float final_mask_[kNumFreqBins];	181 float final_mask_[kNumFreqBins];

155	182

156 float target_angle_radians_;	183 float target_angle_radians_;

157 // Angles of the interferer scenarios.	184 // Angles of the interferer scenarios.

158 std::vector<float> interf_angles_radians_;	185 std::vector<float> interf_angles_radians_;

159 // The angle between the target and the interferer scenarios.	186 // The angle between the target and the interferer scenarios.

160 const float away_radians_;	187 const float away_radians_;

161	188

162 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.	189 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.

163 ComplexMatrixF delay_sum_masks_[kNumFreqBins];	190 ComplexMatrixF delay_sum_masks_[kNumFreqBins];

164 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];

165	191

166 // Arrays of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	192 // Arrays of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

167 // \|num_input_channels_\|.	193 // \|num_input_channels_\|.

168 ComplexMatrixF target_cov_mats_[kNumFreqBins];	194 ComplexMatrixF target_cov_mats_[kNumFreqBins];

169 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];	195 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];

170 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	196 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

171 // \|num_input_channels_\|. The vector has a size equal to the number of	197 // \|num_input_channels_\|. The vector has a size equal to the number of

172 // interferer scenarios.	198 // interferer scenarios.

173 std::vector<std::unique_ptr<ComplexMatrixF>> interf_cov_mats_[kNumFreqBins];	199 std::vector<std::unique_ptr<ComplexMatrixF>> interf_cov_mats_[kNumFreqBins];

174	200

175 // Of length \|kNumFreqBins\|.	201 // Of length \|kNumFreqBins\|.

176 float wave_numbers_[kNumFreqBins];	202 float wave_numbers_[kNumFreqBins];

177	203

178 // Preallocated for ProcessAudioBlock()	204 // Preallocated for ProcessAudioBlock()

179 // Of length \|kNumFreqBins\|.	205 // Of length \|kNumFreqBins\|.

180 float rxiws_[kNumFreqBins];	206 float rxiws_[kNumFreqBins];

181 // The vector has a size equal to the number of interferer scenarios.	207 // The vector has a size equal to the number of interferer scenarios.

182 std::vector<float> rpsiws_[kNumFreqBins];	208 std::vector<float> rpsiws_[kNumFreqBins];

183	209

184 // The microphone normalization factor.	210 // The microphone normalization factor.

185 ComplexMatrixF eig_m_;	211 ComplexMatrixF eig_m_;

186	212

187 // For processing the high-frequency input signal.	213 // For processing the high-frequency input signal.

188 float high_pass_postfilter_mask_;	214 float high_pass_postfilter_mask_;

	215 float old_high_pass_mask_;

189	216

190 // True when the target signal is present.	217 // True when the target signal is present.

191 bool is_target_present_;	218 bool is_target_present_;

192 // Number of blocks after which the data is considered interference if the	219 // Number of blocks after which the data is considered interference if the

193 // mask does not pass \|kMaskSignalThreshold\|.	220 // mask does not pass \|kMaskSignalThreshold\|.

194 size_t hold_target_blocks_;	221 size_t hold_target_blocks_;

195 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.	222 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.

196 size_t interference_blocks_count_;	223 size_t interference_blocks_count_;

197 };	224 };

198	225

199 } // namespace webrtc	226 } // namespace webrtc

200	227

201 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	228 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

OLD	NEW