webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h - Issue 1378973003: Implement new version of the NonlinearBeamformer

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1378973003: Implement new version of the NonlinearBeamformer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Fix float constant Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/beamformer/covariance_matrix_generator_unittest.cc ('k') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

13	13

14 #include <vector>	14 #include <vector>

15	15

16 #include "webrtc/common_audio/lapped_transform.h"	16 #include "webrtc/common_audio/lapped_transform.h"

17 #include "webrtc/common_audio/channel_buffer.h"	17 #include "webrtc/common_audio/channel_buffer.h"

18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"	18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"

19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"	19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"

	20 #include "webrtc/system_wrappers/interface/scoped_vector.h"

20	21

21 namespace webrtc {	22 namespace webrtc {

22	23

23 // Enhances sound sources coming directly in front of a uniform linear array	24 // Enhances sound sources coming directly in front of a uniform linear array

24 // and suppresses sound sources coming from all other directions. Operates on	25 // and suppresses sound sources coming from all other directions. Operates on

25 // multichannel signals and produces single-channel output.	26 // multichannel signals and produces single-channel output.

26 //	27 //

27 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear	28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear

28 // Beamforming Postprocessor" by Bastiaan Kleijn.	29 // Beamforming Postprocessor" by Bastiaan Kleijn.

29 //

30 // TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.

31 class NonlinearBeamformer	30 class NonlinearBeamformer

32 : public Beamformer<float>,	31 : public Beamformer<float>,

33 public LappedTransform::Callback {	32 public LappedTransform::Callback {

34 public:	33 public:

35 // At the moment it only accepts uniform linear microphone arrays. Using the

36 // first microphone as a reference position [0, 0, 0] is a natural choice.

37 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);	34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);

38	35

39 // Sample rate corresponds to the lower band.	36 // Sample rate corresponds to the lower band.

40 // Needs to be called before the NonlinearBeamformer can be used.	37 // Needs to be called before the NonlinearBeamformer can be used.

41 void Initialize(int chunk_size_ms, int sample_rate_hz) override;	38 void Initialize(int chunk_size_ms, int sample_rate_hz) override;

42	39

43 // Process one time-domain chunk of audio. The audio is expected to be split	40 // Process one time-domain chunk of audio. The audio is expected to be split

44 // into frequency bands inside the ChannelBuffer. The number of frames and	41 // into frequency bands inside the ChannelBuffer. The number of frames and

45 // channels must correspond to the constructor parameters. The same	42 // channels must correspond to the constructor parameters. The same

46 // ChannelBuffer can be passed in as \|input\| and \|output\|.	43 // ChannelBuffer can be passed in as \|input\| and \|output\|.

(...skipping 15 matching lines...) Expand all Loading...
62 int num_input_channels,	59 int num_input_channels,

63 size_t num_freq_bins,	60 size_t num_freq_bins,

64 int num_output_channels,	61 int num_output_channels,

65 complex<float>* const* output) override;	62 complex<float>* const* output) override;

66	63

67 private:	64 private:

68 typedef Matrix<float> MatrixF;	65 typedef Matrix<float> MatrixF;

69 typedef ComplexMatrix<float> ComplexMatrixF;	66 typedef ComplexMatrix<float> ComplexMatrixF;

70 typedef complex<float> complex_f;	67 typedef complex<float> complex_f;

71	68

	69 void InitInterfAngles();

72 void InitDelaySumMasks();	70 void InitDelaySumMasks();

73 void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle.	71 void InitTargetCovMats();

74 void InitInterfCovMats();	72 void InitInterfCovMats();

75	73

76 // An implementation of equation 18, which calculates postfilter masks that,	74 // Calculates postfilter masks that minimize the mean squared error of our

77 // when applied, minimize the mean-square error of our estimation of the	75 // estimation of the desired signal.

78 // desired signal. A sub-task is to calculate lambda, which is solved via

79 // equation 13.

80 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,	76 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,

81 float rpsiw,	77 float rpsiw,

82 float ratio_rxiw_rxim,	78 float ratio_rxiw_rxim,

83 float rmxi_r,	79 float rmxi_r);

84 float mask_threshold);

85	80

86 // Prevents the postfilter masks from degenerating too quickly (a cause of	81 // Prevents the postfilter masks from degenerating too quickly (a cause of

87 // musical noise).	82 // musical noise).

88 void ApplyMaskTimeSmoothing();	83 void ApplyMaskTimeSmoothing();

89 void ApplyMaskFrequencySmoothing();	84 void ApplyMaskFrequencySmoothing();

90	85

91 // The postfilter masks are unreliable at low frequencies. Calculates a better	86 // The postfilter masks are unreliable at low frequencies. Calculates a better

92 // mask by averaging mid-low frequency values.	87 // mask by averaging mid-low frequency values.

93 void ApplyLowFrequencyCorrection();	88 void ApplyLowFrequencyCorrection();

94	89

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
127 size_t high_mean_start_bin_;	122 size_t high_mean_start_bin_;

128 size_t high_mean_end_bin_;	123 size_t high_mean_end_bin_;

129	124

130 // Quickly varying mask updated every block.	125 // Quickly varying mask updated every block.

131 float new_mask_[kNumFreqBins];	126 float new_mask_[kNumFreqBins];

132 // Time smoothed mask.	127 // Time smoothed mask.

133 float time_smooth_mask_[kNumFreqBins];	128 float time_smooth_mask_[kNumFreqBins];

134 // Time and frequency smoothed mask.	129 // Time and frequency smoothed mask.

135 float final_mask_[kNumFreqBins];	130 float final_mask_[kNumFreqBins];

136	131

	132 // Angles of the interferer scenarios.

	133 std::vector<float> interf_angles_radians_;

	134

137 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.	135 // Array of length \|kNumFreqBins\|, Matrix of size \|1\| x \|num_channels_\|.

138 ComplexMatrixF delay_sum_masks_[kNumFreqBins];	136 ComplexMatrixF delay_sum_masks_[kNumFreqBins];

139 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];	137 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];

140	138

141 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	139 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

142 // \|num_input_channels_\|.	140 // \|num_input_channels_\|.

143 ComplexMatrixF target_cov_mats_[kNumFreqBins];	141 ComplexMatrixF target_cov_mats_[kNumFreqBins];

144	142

145 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x	143 // Array of length \|kNumFreqBins\|, Matrix of size \|num_input_channels_\| x

146 // \|num_input_channels_\|.	144 // \|num_input_channels_\|. ScopedVector has a size equal to the number of

147 ComplexMatrixF interf_cov_mats_[kNumFreqBins];	145 // interferer scenarios.

148 ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];	146 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];

149	147

150 // Of length \|kNumFreqBins\|.	148 // Of length \|kNumFreqBins\|.

151 float mask_thresholds_[kNumFreqBins];

152 float wave_numbers_[kNumFreqBins];	149 float wave_numbers_[kNumFreqBins];

153	150

154 // Preallocated for ProcessAudioBlock()	151 // Preallocated for ProcessAudioBlock()

155 // Of length \|kNumFreqBins\|.	152 // Of length \|kNumFreqBins\|.

156 float rxiws_[kNumFreqBins];	153 float rxiws_[kNumFreqBins];

157 float rpsiws_[kNumFreqBins];	154 // The vector has a size equal to the number of interferer scenarios.

158 float reflected_rpsiws_[kNumFreqBins];	155 std::vector<float> rpsiws_[kNumFreqBins];

159	156

160 // The microphone normalization factor.	157 // The microphone normalization factor.

161 ComplexMatrixF eig_m_;	158 ComplexMatrixF eig_m_;

162	159

163 // For processing the high-frequency input signal.	160 // For processing the high-frequency input signal.

164 float high_pass_postfilter_mask_;	161 float high_pass_postfilter_mask_;

165	162

166 // True when the target signal is present.	163 // True when the target signal is present.

167 bool is_target_present_;	164 bool is_target_present_;

168 // Number of blocks after which the data is considered interference if the	165 // Number of blocks after which the data is considered interference if the

169 // mask does not pass \|kMaskSignalThreshold\|.	166 // mask does not pass \|kMaskSignalThreshold\|.

170 size_t hold_target_blocks_;	167 size_t hold_target_blocks_;

171 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.	168 // Number of blocks since the last mask that passed \|kMaskSignalThreshold\|.

172 size_t interference_blocks_count_;	169 size_t interference_blocks_count_;

173 };	170 };

174	171

175 } // namespace webrtc	172 } // namespace webrtc

176	173

177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_	174 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

OLD	NEW