webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h - Issue 1394103003: Make the nonlinear beamformer steerable

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1394103003: Make the nonlinear beamformer steerable (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@highfreq

Patch Set: Fix more windows compile errors Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/beamformer/beamformer.h ('k') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 13 matching lines...) Expand all Loading...
24 // Enhances sound sources coming directly in front of a uniform linear array	24 // Enhances sound sources coming directly in front of a uniform linear array

25 // and suppresses sound sources coming from all other directions. Operates on	25 // and suppresses sound sources coming from all other directions. Operates on

26 // multichannel signals and produces single-channel output.	26 // multichannel signals and produces single-channel output.

27 //	27 //

28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear	28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear

29 // Beamforming Postprocessor" by Bastiaan Kleijn.	29 // Beamforming Postprocessor" by Bastiaan Kleijn.

30 class NonlinearBeamformer	30 class NonlinearBeamformer

31 : public Beamformer<float>,	31 : public Beamformer<float>,

32 public LappedTransform::Callback {	32 public LappedTransform::Callback {

33 public:	33 public:

34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);	34 static const float kHalfBeamWidthRadians;

	35

	36 explicit NonlinearBeamformer(

	37 const std::vector<Point>& array_geometry,

	38 SphericalPointf target_direction = SphericalPointf(M_PI / 2.f, 0.f, 1.f));

35	39

36 // Sample rate corresponds to the lower band.	40 // Sample rate corresponds to the lower band.

37 // Needs to be called before the NonlinearBeamformer can be used.	41 // Needs to be called before the NonlinearBeamformer can be used.

38 void Initialize(int chunk_size_ms, int sample_rate_hz) override;	42 void Initialize(int chunk_size_ms, int sample_rate_hz) override;

39	43

40 // Process one time-domain chunk of audio. The audio is expected to be split	44 // Process one time-domain chunk of audio. The audio is expected to be split

41 // into frequency bands inside the ChannelBuffer. The number of frames and	45 // into frequency bands inside the ChannelBuffer. The number of frames and

42 // channels must correspond to the constructor parameters. The same	46 // channels must correspond to the constructor parameters. The same

43 // ChannelBuffer can be passed in as \|input\| and \|output\|.	47 // ChannelBuffer can be passed in as \|input\| and \|output\|.

44 void ProcessChunk(const ChannelBuffer<float>& input,	48 void ProcessChunk(const ChannelBuffer<float>& input,

45 ChannelBuffer<float>* output) override;	49 ChannelBuffer<float>* output) override;

46	50

	51 void AimAt(const SphericalPointf& target_direction) override;

	52

47 bool IsInBeam(const SphericalPointf& spherical_point) override;	53 bool IsInBeam(const SphericalPointf& spherical_point) override;

48	54

49 // After processing each block \|is_target_present_\| is set to true if the	55 // After processing each block \|is_target_present_\| is set to true if the

50 // target signal es present and to false otherwise. This methods can be called	56 // target signal es present and to false otherwise. This methods can be called

51 // to know if the data is target signal or interference and process it	57 // to know if the data is target signal or interference and process it

52 // accordingly.	58 // accordingly.

53 bool is_target_present() override { return is_target_present_; }	59 bool is_target_present() override { return is_target_present_; }

54	60

55 protected:	61 protected:

56 // Process one frequency-domain block of audio. This is where the fun	62 // Process one frequency-domain block of audio. This is where the fun

57 // happens. Implements LappedTransform::Callback.	63 // happens. Implements LappedTransform::Callback.

58 void ProcessAudioBlock(const complex<float>* const* input,	64 void ProcessAudioBlock(const complex<float>* const* input,

59 int num_input_channels,	65 int num_input_channels,

60 size_t num_freq_bins,	66 size_t num_freq_bins,

61 int num_output_channels,	67 int num_output_channels,

62 complex<float>* const* output) override;	68 complex<float>* const* output) override;

63	69

64 private:	70 private:

	71 FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest,

	72 InterfAnglesTakeAmbiguityIntoAccount);

	73

65 typedef Matrix<float> MatrixF;	74 typedef Matrix<float> MatrixF;

66 typedef ComplexMatrix<float> ComplexMatrixF;	75 typedef ComplexMatrix<float> ComplexMatrixF;

67 typedef complex<float> complex_f;	76 typedef complex<float> complex_f;

68	77

69 void InitFrequencyCorrectionRanges();	78 void InitLowFrequencyCorrectionRanges();

	79 void InitHighFrequencyCorrectionRanges();

70 void InitInterfAngles();	80 void InitInterfAngles();

71 void InitDelaySumMasks();	81 void InitDelaySumMasks();

72 void InitTargetCovMats();	82 void InitTargetCovMats();

	83 void InitDiffuseCovMats();

73 void InitInterfCovMats();	84 void InitInterfCovMats();

	85 void NormalizeCovMats();

74	86

75 // Calculates postfilter masks that minimize the mean squared error of our	87 // Calculates postfilter masks that minimize the mean squared error of our

76 // estimation of the desired signal.	88 // estimation of the desired signal.

77 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,	89 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,

78 float rpsiw,	90 float rpsiw,

79 float ratio_rxiw_rxim,	91 float ratio_rxiw_rxim,

80 float rmxi_r);	92 float rmxi_r);

81	93

82 // Prevents the postfilter masks from degenerating too quickly (a cause of	94 // Prevents the postfilter masks from degenerating too quickly (a cause of

83 // musical noise).	95 // musical noise).

(...skipping 25 matching lines...) Expand all Loading...
109 // Deals with the fft transform and blocking.	121 // Deals with the fft transform and blocking.

110 size_t chunk_length_;	122 size_t chunk_length_;

111 rtc::scoped_ptr<LappedTransform> lapped_transform_;	123 rtc::scoped_ptr<LappedTransform> lapped_transform_;

112 float window_[kFftSize];	124 float window_[kFftSize];

113	125

114 // Parameters exposed to the user.	126 // Parameters exposed to the user.

115 const int num_input_channels_;	127 const int num_input_channels_;

116 int sample_rate_hz_;	128 int sample_rate_hz_;

117	129

118 const std::vector<Point> array_geometry_;	130 const std::vector<Point> array_geometry_;

	131 // The normal direction of the array if it has one and it is in the xy-plane.

	132 const rtc::Maybe<Point> array_normal_;

119	133

120 // Minimum spacing between microphone pairs.	134 // Minimum spacing between microphone pairs.

121 const float min_mic_spacing_;	135 const float min_mic_spacing_;

122	136

123 // Calculated based on user-input and constants in the .cc file.	137 // Calculated based on user-input and constants in the .cc file.

124 size_t low_mean_start_bin_;	138 size_t low_mean_start_bin_;

125 size_t low_mean_end_bin_;	139 size_t low_mean_end_bin_;

126 size_t high_mean_start_bin_;	140 size_t high_mean_start_bin_;

127 size_t high_mean_end_bin_;	141 size_t high_mean_end_bin_;

128	142

129 // Quickly varying mask updated every block.	143 // Quickly varying mask updated every block.

130 float new_mask_[kNumFreqBins];	144 float new_mask_[kNumFreqBins];

131 // Time smoothed mask.	145 // Time smoothed mask.

132 float time_smooth_mask_[kNumFreqBins];	146 float time_smooth_mask_[kNumFreqBins];

133 // Time and frequency smoothed mask.	147 // Time and frequency smoothed mask.

134 float final_mask_[kNumFreqBins];	148 float final_mask_[kNumFreqBins];

135	149

	150 float target_angle_radians_;

136 // Angles of the interferer scenarios.	151 // Angles of the interferer scenarios.

137 std::vector<float> interf_angles_radians_;	152 std::vector<float> interf_angles_radians_;

	153 // The angle between the target and the interferer scenarios.

	154 const float away_radians_;

138	155

139 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.	156 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.

140 ComplexMatrixF delay_sum_masks_[kNumFreqBins];	157 ComplexMatrixF delay_sum_masks_[kNumFreqBins];

141 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];	158 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];

142	159

143 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	160 // Arrays of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

144 // \|num_input_channels_\|.	161 // \|num_input_channels_\|.

145 ComplexMatrixF target_cov_mats_[kNumFreqBins];	162 ComplexMatrixF target_cov_mats_[kNumFreqBins];

146	163 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];

147 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	164 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

148 // \|num_input_channels_\|. ScopedVector has a size equal to the number of	165 // \|num_input_channels_\|. ScopedVector has a size equal to the number of

149 // interferer scenarios.	166 // interferer scenarios.

150 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];	167 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];

151	168

152 // Of length \|kNumFreqBins\|.	169 // Of length \|kNumFreqBins\|.

153 float wave_numbers_[kNumFreqBins];	170 float wave_numbers_[kNumFreqBins];

154	171

155 // Preallocated for ProcessAudioBlock()	172 // Preallocated for ProcessAudioBlock()

156 // Of length \|kNumFreqBins\|.	173 // Of length \|kNumFreqBins\|.

(...skipping 12 matching lines...) Expand all Loading...
169 // Number of blocks after which the data is considered interference if the	186 // Number of blocks after which the data is considered interference if the

170 // mask does not pass \|kMaskSignalThreshold\|.	187 // mask does not pass \|kMaskSignalThreshold\|.

171 size_t hold_target_blocks_;	188 size_t hold_target_blocks_;

172 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.	189 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.

173 size_t interference_blocks_count_;	190 size_t interference_blocks_count_;

174 };	191 };

175	192

176 } // namespace webrtc	193 } // namespace webrtc

177	194

178 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	195 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

OLD	NEW