Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(487)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1378973003: Implement new version of the NonlinearBeamformer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
13 13
14 #include <vector> 14 #include <vector>
15 15
16 #include "webrtc/common_audio/lapped_transform.h" 16 #include "webrtc/common_audio/lapped_transform.h"
17 #include "webrtc/common_audio/channel_buffer.h" 17 #include "webrtc/common_audio/channel_buffer.h"
18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" 18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"
19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" 19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
20 #include "webrtc/system_wrappers/interface/scoped_vector.h"
20 21
21 namespace webrtc { 22 namespace webrtc {
22 23
23 // Enhances sound sources coming directly in front of a uniform linear array 24 // Enhances sound sources coming directly in front of a uniform linear array
24 // and suppresses sound sources coming from all other directions. Operates on 25 // and suppresses sound sources coming from all other directions. Operates on
25 // multichannel signals and produces single-channel output. 26 // multichannel signals and produces single-channel output.
26 // 27 //
27 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear 28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
28 // Beamforming Postprocessor" by Bastiaan Kleijn. 29 // Beamforming Postprocessor" by Bastiaan Kleijn.
29 //
30 // TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
31 class NonlinearBeamformer 30 class NonlinearBeamformer
32 : public Beamformer<float>, 31 : public Beamformer<float>,
33 public LappedTransform::Callback { 32 public LappedTransform::Callback {
34 public: 33 public:
35 // At the moment it only accepts uniform linear microphone arrays. Using the
36 // first microphone as a reference position [0, 0, 0] is a natural choice.
37 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); 34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
38 35
39 // Sample rate corresponds to the lower band. 36 // Sample rate corresponds to the lower band.
40 // Needs to be called before the NonlinearBeamformer can be used. 37 // Needs to be called before the NonlinearBeamformer can be used.
41 void Initialize(int chunk_size_ms, int sample_rate_hz) override; 38 void Initialize(int chunk_size_ms, int sample_rate_hz) override;
42 39
43 // Process one time-domain chunk of audio. The audio is expected to be split 40 // Process one time-domain chunk of audio. The audio is expected to be split
44 // into frequency bands inside the ChannelBuffer. The number of frames and 41 // into frequency bands inside the ChannelBuffer. The number of frames and
45 // channels must correspond to the constructor parameters. The same 42 // channels must correspond to the constructor parameters. The same
46 // ChannelBuffer can be passed in as |input| and |output|. 43 // ChannelBuffer can be passed in as |input| and |output|.
(...skipping 15 matching lines...) Expand all
62 int num_input_channels, 59 int num_input_channels,
63 size_t num_freq_bins, 60 size_t num_freq_bins,
64 int num_output_channels, 61 int num_output_channels,
65 complex<float>* const* output) override; 62 complex<float>* const* output) override;
66 63
67 private: 64 private:
68 typedef Matrix<float> MatrixF; 65 typedef Matrix<float> MatrixF;
69 typedef ComplexMatrix<float> ComplexMatrixF; 66 typedef ComplexMatrix<float> ComplexMatrixF;
70 typedef complex<float> complex_f; 67 typedef complex<float> complex_f;
71 68
69 void InitInterfAngles();
72 void InitDelaySumMasks(); 70 void InitDelaySumMasks();
73 void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle. 71 void InitTargetCovMats();
74 void InitInterfCovMats(); 72 void InitInterfCovMats();
75 73
76 // An implementation of equation 18, which calculates postfilter masks that, 74 // Calculates postfilter masks that minimize the mean-square error of our
Andrew MacDonald 2015/10/06 23:54:32 nit: mean squared error
aluebs-webrtc 2015/10/07 22:08:05 Done.
77 // when applied, minimize the mean-square error of our estimation of the 75 // estimation of the desired signal.
78 // desired signal. A sub-task is to calculate lambda, which is solved via
79 // equation 13.
80 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, 76 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
81 float rpsiw, 77 float rpsiw,
82 float ratio_rxiw_rxim, 78 float ratio_rxiw_rxim,
83 float rmxi_r, 79 float rmxi_r);
84 float mask_threshold);
85 80
86 // Prevents the postfilter masks from degenerating too quickly (a cause of 81 // Prevents the postfilter masks from degenerating too quickly (a cause of
87 // musical noise). 82 // musical noise).
88 void ApplyMaskTimeSmoothing(); 83 void ApplyMaskTimeSmoothing();
89 void ApplyMaskFrequencySmoothing(); 84 void ApplyMaskFrequencySmoothing();
90 85
91 // The postfilter masks are unreliable at low frequencies. Calculates a better 86 // The postfilter masks are unreliable at low frequencies. Calculates a better
92 // mask by averaging mid-low frequency values. 87 // mask by averaging mid-low frequency values.
93 void ApplyLowFrequencyCorrection(); 88 void ApplyLowFrequencyCorrection();
94 89
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 size_t high_mean_start_bin_; 122 size_t high_mean_start_bin_;
128 size_t high_mean_end_bin_; 123 size_t high_mean_end_bin_;
129 124
130 // Quickly varying mask updated every block. 125 // Quickly varying mask updated every block.
131 float new_mask_[kNumFreqBins]; 126 float new_mask_[kNumFreqBins];
132 // Time smoothed mask. 127 // Time smoothed mask.
133 float time_smooth_mask_[kNumFreqBins]; 128 float time_smooth_mask_[kNumFreqBins];
134 // Time and frequency smoothed mask. 129 // Time and frequency smoothed mask.
135 float final_mask_[kNumFreqBins]; 130 float final_mask_[kNumFreqBins];
136 131
132 // Angles of the interferer scenarios.
133 std::vector<float> interf_angles_radians_;
134
137 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. 135 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
138 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; 136 ComplexMatrixF delay_sum_masks_[kNumFreqBins];
139 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; 137 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
140 138
141 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 139 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
142 // |num_input_channels_|. 140 // |num_input_channels_|.
143 ComplexMatrixF target_cov_mats_[kNumFreqBins]; 141 ComplexMatrixF target_cov_mats_[kNumFreqBins];
144 142
145 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 143 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
146 // |num_input_channels_|. 144 // |num_input_channels_|. ScopedVector has a size equal to the number of
147 ComplexMatrixF interf_cov_mats_[kNumFreqBins]; 145 // interferer scenarios.
148 ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins]; 146 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];
149 147
150 // Of length |kNumFreqBins|. 148 // Of length |kNumFreqBins|.
151 float mask_thresholds_[kNumFreqBins];
152 float wave_numbers_[kNumFreqBins]; 149 float wave_numbers_[kNumFreqBins];
153 150
154 // Preallocated for ProcessAudioBlock() 151 // Preallocated for ProcessAudioBlock()
155 // Of length |kNumFreqBins|. 152 // Of length |kNumFreqBins|.
156 float rxiws_[kNumFreqBins]; 153 float rxiws_[kNumFreqBins];
157 float rpsiws_[kNumFreqBins]; 154 // The vector has a size equal to the number of interferer scenarios.
158 float reflected_rpsiws_[kNumFreqBins]; 155 std::vector<float> rpsiws_[kNumFreqBins];
159 156
160 // The microphone normalization factor. 157 // The microphone normalization factor.
161 ComplexMatrixF eig_m_; 158 ComplexMatrixF eig_m_;
162 159
163 // For processing the high-frequency input signal. 160 // For processing the high-frequency input signal.
164 float high_pass_postfilter_mask_; 161 float high_pass_postfilter_mask_;
165 162
166 // True when the target signal is present. 163 // True when the target signal is present.
167 bool is_target_present_; 164 bool is_target_present_;
168 // Number of blocks after which the data is considered interference if the 165 // Number of blocks after which the data is considered interference if the
169 // mask does not pass |kMaskSignalThreshold|. 166 // mask does not pass |kMaskSignalThreshold|.
170 size_t hold_target_blocks_; 167 size_t hold_target_blocks_;
171 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. 168 // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
172 size_t interference_blocks_count_; 169 size_t interference_blocks_count_;
173 }; 170 };
174 171
175 } // namespace webrtc 172 } // namespace webrtc
176 173
177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 174 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698