webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h - Issue 2110503002: Revert "Pull out the PostFilter to its own NonlinearBeamformer API"

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 2110503002: Revert "Pull out the PostFilter to its own NonlinearBeamformer API" (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h ('k') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

13	13

14 // MSVC++ requires this to be set before any other includes to get M_PI.	14 // MSVC++ requires this to be set before any other includes to get M_PI.

15 #define _USE_MATH_DEFINES	15 #define _USE_MATH_DEFINES

16	16

17 #include <math.h>	17 #include <math.h>

18	18

19 #include <memory>	19 #include <memory>

20 #include <vector>	20 #include <vector>

21	21

22 #include "webrtc/common_audio/lapped_transform.h"	22 #include "webrtc/common_audio/lapped_transform.h"

23 #include "webrtc/common_audio/channel_buffer.h"	23 #include "webrtc/common_audio/channel_buffer.h"

24 #include "webrtc/modules/audio_processing/beamformer/array_util.h"	24 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"

25 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"	25 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"

26	26

27 namespace webrtc {	27 namespace webrtc {

28	28

29 class PostFilterTransform : public LappedTransform::Callback {

30 public:

31 PostFilterTransform(size_t num_channels,

32 size_t chunk_length,

33 float* window,

34 size_t fft_size);

35

36 void ProcessChunk(float* const* data, float* final_mask);

37

38 protected:

39 void ProcessAudioBlock(const complex<float>* const* input,

40 size_t num_input_channels,

41 size_t num_freq_bins,

42 size_t num_output_channels,

43 complex<float>* const* output) override;

44

45 private:

46 LappedTransform transform_;

47 const size_t num_freq_bins_;

48 float* final_mask_;

49 };

50

51 // Enhances sound sources coming directly in front of a uniform linear array	29 // Enhances sound sources coming directly in front of a uniform linear array

52 // and suppresses sound sources coming from all other directions. Operates on	30 // and suppresses sound sources coming from all other directions. Operates on

53 // multichannel signals and produces single-channel output.	31 // multichannel signals and produces single-channel output.

54 //	32 //

55 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear	33 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear

56 // Beamforming Postprocessor" by Bastiaan Kleijn.	34 // Beamforming Postprocessor" by Bastiaan Kleijn.

57 class NonlinearBeamformer : public LappedTransform::Callback {	35 class NonlinearBeamformer

	36 : public Beamformer<float>,

	37 public LappedTransform::Callback {

58 public:	38 public:

59 static const float kHalfBeamWidthRadians;	39 static const float kHalfBeamWidthRadians;

60	40

61 explicit NonlinearBeamformer(	41 explicit NonlinearBeamformer(

62 const std::vector<Point>& array_geometry,	42 const std::vector<Point>& array_geometry,

63 size_t num_postfilter_channels,

64 SphericalPointf target_direction =	43 SphericalPointf target_direction =

65 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));	44 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));

66	45

67 // Sample rate corresponds to the lower band.	46 // Sample rate corresponds to the lower band.

68 // Needs to be called before the NonlinearBeamformer can be used.	47 // Needs to be called before the NonlinearBeamformer can be used.

69 virtual void Initialize(int chunk_size_ms, int sample_rate_hz);	48 void Initialize(int chunk_size_ms, int sample_rate_hz) override;

70	49

71 // Analyzes one time-domain chunk of audio. The audio is expected to be split	50 // Process one time-domain chunk of audio. The audio is expected to be split

72 // into frequency bands inside the ChannelBuffer. The number of frames and	51 // into frequency bands inside the ChannelBuffer. The number of frames and

73 // channels must correspond to the constructor parameters.	52 // channels must correspond to the constructor parameters. The same

74 virtual void AnalyzeChunk(const ChannelBuffer<float>& data);	53 // ChannelBuffer can be passed in as \|input\| and \|output\|.

	54 void ProcessChunk(const ChannelBuffer<float>& input,

	55 ChannelBuffer<float>* output) override;

75	56

76 // Applies the postfilter mask to one chunk of audio. The audio is expected to	57 void AimAt(const SphericalPointf& target_direction) override;

77 // be split into frequency bands inside the ChannelBuffer. The number of

78 // frames and channels must correspond to the constructor parameters.

79 virtual void PostFilter(ChannelBuffer<float>* data);

80	58

81 virtual void AimAt(const SphericalPointf& target_direction);	59 bool IsInBeam(const SphericalPointf& spherical_point) override;

82

83 virtual bool IsInBeam(const SphericalPointf& spherical_point);

84	60

85 // After processing each block \|is_target_present_\| is set to true if the	61 // After processing each block \|is_target_present_\| is set to true if the

86 // target signal es present and to false otherwise. This methods can be called	62 // target signal es present and to false otherwise. This methods can be called

87 // to know if the data is target signal or interference and process it	63 // to know if the data is target signal or interference and process it

88 // accordingly.	64 // accordingly.

89 virtual bool is_target_present() { return is_target_present_; }	65 bool is_target_present() override { return is_target_present_; }

90	66

91 protected:	67 protected:

92 // Process one frequency-domain block of audio. This is where the fun	68 // Process one frequency-domain block of audio. This is where the fun

93 // happens. Implements LappedTransform::Callback.	69 // happens. Implements LappedTransform::Callback.

94 void ProcessAudioBlock(const complex<float>* const* input,	70 void ProcessAudioBlock(const complex<float>* const* input,

95 size_t num_input_channels,	71 size_t num_input_channels,

96 size_t num_freq_bins,	72 size_t num_freq_bins,

97 size_t num_output_channels,	73 size_t num_output_channels,

98 complex<float>* const* output) override;	74 complex<float>* const* output) override;

99	75

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133 // Postfilter masks are also unreliable at high frequencies. Average mid-high	109 // Postfilter masks are also unreliable at high frequencies. Average mid-high

134 // frequency masks to calculate a single mask per block which can be applied	110 // frequency masks to calculate a single mask per block which can be applied

135 // in the time-domain. Further, we average these block-masks over a chunk,	111 // in the time-domain. Further, we average these block-masks over a chunk,

136 // resulting in one postfilter mask per audio chunk. This allows us to skip	112 // resulting in one postfilter mask per audio chunk. This allows us to skip

137 // both transforming and blocking the high-frequency signal.	113 // both transforming and blocking the high-frequency signal.

138 void ApplyHighFrequencyCorrection();	114 void ApplyHighFrequencyCorrection();

139	115

140 // Compute the means needed for the above frequency correction.	116 // Compute the means needed for the above frequency correction.

141 float MaskRangeMean(size_t start_bin, size_t end_bin);	117 float MaskRangeMean(size_t start_bin, size_t end_bin);

142	118

143 // Applies post-filter mask to \|input\| and store in \|output\|.	119 // Applies both sets of masks to \|input\| and store in \|output\|.

144 void ApplyPostFilter(const complex_f* input, complex_f* output);	120 void ApplyMasks(const complex_f* const* input, complex_f* const* output);

145	121

146 void EstimateTargetPresence();	122 void EstimateTargetPresence();

147	123

148 static const size_t kFftSize = 256;	124 static const size_t kFftSize = 256;

149 static const size_t kNumFreqBins = kFftSize / 2 + 1;	125 static const size_t kNumFreqBins = kFftSize / 2 + 1;

150	126

151 // Deals with the fft transform and blocking.	127 // Deals with the fft transform and blocking.

152 size_t chunk_length_;	128 size_t chunk_length_;

153 std::unique_ptr<LappedTransform> process_transform_;	129 std::unique_ptr<LappedTransform> lapped_transform_;

154 std::unique_ptr<PostFilterTransform> postfilter_transform_;

155 float window_[kFftSize];	130 float window_[kFftSize];

156	131

157 // Parameters exposed to the user.	132 // Parameters exposed to the user.

158 const size_t num_input_channels_;	133 const size_t num_input_channels_;

159 const size_t num_postfilter_channels_;

160 int sample_rate_hz_;	134 int sample_rate_hz_;

161	135

162 const std::vector<Point> array_geometry_;	136 const std::vector<Point> array_geometry_;

163 // The normal direction of the array if it has one and it is in the xy-plane.	137 // The normal direction of the array if it has one and it is in the xy-plane.

164 const rtc::Optional<Point> array_normal_;	138 const rtc::Optional<Point> array_normal_;

165	139

166 // Minimum spacing between microphone pairs.	140 // Minimum spacing between microphone pairs.

167 const float min_mic_spacing_;	141 const float min_mic_spacing_;

168	142

169 // Calculated based on user-input and constants in the .cc file.	143 // Calculated based on user-input and constants in the .cc file.

(...skipping 10 matching lines...) Expand all Loading...
180 float final_mask_[kNumFreqBins];	154 float final_mask_[kNumFreqBins];

181	155

182 float target_angle_radians_;	156 float target_angle_radians_;

183 // Angles of the interferer scenarios.	157 // Angles of the interferer scenarios.

184 std::vector<float> interf_angles_radians_;	158 std::vector<float> interf_angles_radians_;

185 // The angle between the target and the interferer scenarios.	159 // The angle between the target and the interferer scenarios.

186 const float away_radians_;	160 const float away_radians_;

187	161

188 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.	162 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.

189 ComplexMatrixF delay_sum_masks_[kNumFreqBins];	163 ComplexMatrixF delay_sum_masks_[kNumFreqBins];

	164 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];

190	165

191 // Arrays of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	166 // Arrays of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

192 // \|num_input_channels_\|.	167 // \|num_input_channels_\|.

193 ComplexMatrixF target_cov_mats_[kNumFreqBins];	168 ComplexMatrixF target_cov_mats_[kNumFreqBins];

194 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];	169 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];

195 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	170 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

196 // \|num_input_channels_\|. The vector has a size equal to the number of	171 // \|num_input_channels_\|. The vector has a size equal to the number of

197 // interferer scenarios.	172 // interferer scenarios.

198 std::vector<std::unique_ptr<ComplexMatrixF>> interf_cov_mats_[kNumFreqBins];	173 std::vector<std::unique_ptr<ComplexMatrixF>> interf_cov_mats_[kNumFreqBins];

199	174

200 // Of length \|kNumFreqBins\|.	175 // Of length \|kNumFreqBins\|.

201 float wave_numbers_[kNumFreqBins];	176 float wave_numbers_[kNumFreqBins];

202	177

203 // Preallocated for ProcessAudioBlock()	178 // Preallocated for ProcessAudioBlock()

204 // Of length \|kNumFreqBins\|.	179 // Of length \|kNumFreqBins\|.

205 float rxiws_[kNumFreqBins];	180 float rxiws_[kNumFreqBins];

206 // The vector has a size equal to the number of interferer scenarios.	181 // The vector has a size equal to the number of interferer scenarios.

207 std::vector<float> rpsiws_[kNumFreqBins];	182 std::vector<float> rpsiws_[kNumFreqBins];

208	183

209 // The microphone normalization factor.	184 // The microphone normalization factor.

210 ComplexMatrixF eig_m_;	185 ComplexMatrixF eig_m_;

211	186

212 // For processing the high-frequency input signal.	187 // For processing the high-frequency input signal.

213 float high_pass_postfilter_mask_;	188 float high_pass_postfilter_mask_;

214 float old_high_pass_mask_;

215	189

216 // True when the target signal is present.	190 // True when the target signal is present.

217 bool is_target_present_;	191 bool is_target_present_;

218 // Number of blocks after which the data is considered interference if the	192 // Number of blocks after which the data is considered interference if the

219 // mask does not pass \|kMaskSignalThreshold\|.	193 // mask does not pass \|kMaskSignalThreshold\|.

220 size_t hold_target_blocks_;	194 size_t hold_target_blocks_;

221 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.	195 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.

222 size_t interference_blocks_count_;	196 size_t interference_blocks_count_;

223 };	197 };

224	198

225 } // namespace webrtc	199 } // namespace webrtc

226	200

227 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	201 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

OLD	NEW