OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
13 | 13 |
14 #include <vector> | 14 #include <vector> |
15 | 15 |
16 #include "webrtc/common_audio/lapped_transform.h" | 16 #include "webrtc/common_audio/lapped_transform.h" |
17 #include "webrtc/common_audio/channel_buffer.h" | 17 #include "webrtc/common_audio/channel_buffer.h" |
18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" | 18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" |
19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" | 19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" |
| 20 #include "webrtc/system_wrappers/interface/scoped_vector.h" |
20 | 21 |
21 namespace webrtc { | 22 namespace webrtc { |
22 | 23 |
23 // Enhances sound sources coming directly in front of a uniform linear array | 24 // Enhances sound sources coming directly in front of a uniform linear array |
24 // and suppresses sound sources coming from all other directions. Operates on | 25 // and suppresses sound sources coming from all other directions. Operates on |
25 // multichannel signals and produces single-channel output. | 26 // multichannel signals and produces single-channel output. |
26 // | 27 // |
27 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear | 28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear |
28 // Beamforming Postprocessor" by Bastiaan Kleijn. | 29 // Beamforming Postprocessor" by Bastiaan Kleijn. |
29 // | |
30 // TODO(aluebs): Target angle assumed to be 0. Parameterize target angle. | |
31 class NonlinearBeamformer | 30 class NonlinearBeamformer |
32 : public Beamformer<float>, | 31 : public Beamformer<float>, |
33 public LappedTransform::Callback { | 32 public LappedTransform::Callback { |
34 public: | 33 public: |
35 // At the moment it only accepts uniform linear microphone arrays. Using the | |
36 // first microphone as a reference position [0, 0, 0] is a natural choice. | |
37 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); | 34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); |
38 | 35 |
39 // Sample rate corresponds to the lower band. | 36 // Sample rate corresponds to the lower band. |
40 // Needs to be called before the NonlinearBeamformer can be used. | 37 // Needs to be called before the NonlinearBeamformer can be used. |
41 void Initialize(int chunk_size_ms, int sample_rate_hz) override; | 38 void Initialize(int chunk_size_ms, int sample_rate_hz) override; |
42 | 39 |
43 // Process one time-domain chunk of audio. The audio is expected to be split | 40 // Process one time-domain chunk of audio. The audio is expected to be split |
44 // into frequency bands inside the ChannelBuffer. The number of frames and | 41 // into frequency bands inside the ChannelBuffer. The number of frames and |
45 // channels must correspond to the constructor parameters. The same | 42 // channels must correspond to the constructor parameters. The same |
46 // ChannelBuffer can be passed in as |input| and |output|. | 43 // ChannelBuffer can be passed in as |input| and |output|. |
(...skipping 15 matching lines...) Expand all Loading... |
62 int num_input_channels, | 59 int num_input_channels, |
63 size_t num_freq_bins, | 60 size_t num_freq_bins, |
64 int num_output_channels, | 61 int num_output_channels, |
65 complex<float>* const* output) override; | 62 complex<float>* const* output) override; |
66 | 63 |
67 private: | 64 private: |
68 typedef Matrix<float> MatrixF; | 65 typedef Matrix<float> MatrixF; |
69 typedef ComplexMatrix<float> ComplexMatrixF; | 66 typedef ComplexMatrix<float> ComplexMatrixF; |
70 typedef complex<float> complex_f; | 67 typedef complex<float> complex_f; |
71 | 68 |
| 69 void InitInterfAngles(); |
72 void InitDelaySumMasks(); | 70 void InitDelaySumMasks(); |
73 void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle. | 71 void InitTargetCovMats(); |
74 void InitInterfCovMats(); | 72 void InitInterfCovMats(); |
75 | 73 |
76 // An implementation of equation 18, which calculates postfilter masks that, | 74 // Calculates postfilter masks that minimize the mean squared error of our |
77 // when applied, minimize the mean-square error of our estimation of the | 75 // estimation of the desired signal. |
78 // desired signal. A sub-task is to calculate lambda, which is solved via | |
79 // equation 13. | |
80 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, | 76 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, |
81 float rpsiw, | 77 float rpsiw, |
82 float ratio_rxiw_rxim, | 78 float ratio_rxiw_rxim, |
83 float rmxi_r, | 79 float rmxi_r); |
84 float mask_threshold); | |
85 | 80 |
86 // Prevents the postfilter masks from degenerating too quickly (a cause of | 81 // Prevents the postfilter masks from degenerating too quickly (a cause of |
87 // musical noise). | 82 // musical noise). |
88 void ApplyMaskTimeSmoothing(); | 83 void ApplyMaskTimeSmoothing(); |
89 void ApplyMaskFrequencySmoothing(); | 84 void ApplyMaskFrequencySmoothing(); |
90 | 85 |
91 // The postfilter masks are unreliable at low frequencies. Calculates a better | 86 // The postfilter masks are unreliable at low frequencies. Calculates a better |
92 // mask by averaging mid-low frequency values. | 87 // mask by averaging mid-low frequency values. |
93 void ApplyLowFrequencyCorrection(); | 88 void ApplyLowFrequencyCorrection(); |
94 | 89 |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
127 size_t high_mean_start_bin_; | 122 size_t high_mean_start_bin_; |
128 size_t high_mean_end_bin_; | 123 size_t high_mean_end_bin_; |
129 | 124 |
130 // Quickly varying mask updated every block. | 125 // Quickly varying mask updated every block. |
131 float new_mask_[kNumFreqBins]; | 126 float new_mask_[kNumFreqBins]; |
132 // Time smoothed mask. | 127 // Time smoothed mask. |
133 float time_smooth_mask_[kNumFreqBins]; | 128 float time_smooth_mask_[kNumFreqBins]; |
134 // Time and frequency smoothed mask. | 129 // Time and frequency smoothed mask. |
135 float final_mask_[kNumFreqBins]; | 130 float final_mask_[kNumFreqBins]; |
136 | 131 |
| 132 // Angles of the interferer scenarios. |
| 133 std::vector<float> interf_angles_radians_; |
| 134 |
137 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. | 135 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. |
138 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; | 136 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; |
139 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; | 137 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; |
140 | 138 |
141 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x | 139 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x |
142 // |num_input_channels_|. | 140 // |num_input_channels_|. |
143 ComplexMatrixF target_cov_mats_[kNumFreqBins]; | 141 ComplexMatrixF target_cov_mats_[kNumFreqBins]; |
144 | 142 |
145 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x | 143 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x |
146 // |num_input_channels_|. | 144 // |num_input_channels_|. ScopedVector has a size equal to the number of |
147 ComplexMatrixF interf_cov_mats_[kNumFreqBins]; | 145 // interferer scenarios. |
148 ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins]; | 146 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins]; |
149 | 147 |
150 // Of length |kNumFreqBins|. | 148 // Of length |kNumFreqBins|. |
151 float mask_thresholds_[kNumFreqBins]; | |
152 float wave_numbers_[kNumFreqBins]; | 149 float wave_numbers_[kNumFreqBins]; |
153 | 150 |
154 // Preallocated for ProcessAudioBlock() | 151 // Preallocated for ProcessAudioBlock() |
155 // Of length |kNumFreqBins|. | 152 // Of length |kNumFreqBins|. |
156 float rxiws_[kNumFreqBins]; | 153 float rxiws_[kNumFreqBins]; |
157 float rpsiws_[kNumFreqBins]; | 154 // The vector has a size equal to the number of interferer scenarios. |
158 float reflected_rpsiws_[kNumFreqBins]; | 155 std::vector<float> rpsiws_[kNumFreqBins]; |
159 | 156 |
160 // The microphone normalization factor. | 157 // The microphone normalization factor. |
161 ComplexMatrixF eig_m_; | 158 ComplexMatrixF eig_m_; |
162 | 159 |
163 // For processing the high-frequency input signal. | 160 // For processing the high-frequency input signal. |
164 float high_pass_postfilter_mask_; | 161 float high_pass_postfilter_mask_; |
165 | 162 |
166 // True when the target signal is present. | 163 // True when the target signal is present. |
167 bool is_target_present_; | 164 bool is_target_present_; |
168 // Number of blocks after which the data is considered interference if the | 165 // Number of blocks after which the data is considered interference if the |
169 // mask does not pass |kMaskSignalThreshold|. | 166 // mask does not pass |kMaskSignalThreshold|. |
170 size_t hold_target_blocks_; | 167 size_t hold_target_blocks_; |
171 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. | 168 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. |
172 size_t interference_blocks_count_; | 169 size_t interference_blocks_count_; |
173 }; | 170 }; |
174 | 171 |
175 } // namespace webrtc | 172 } // namespace webrtc |
176 | 173 |
177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 174 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
OLD | NEW |