Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1394103003: Make the nonlinear beamformer steerable (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@highfreq
Patch Set: Generalize interferer scenarios Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 13 matching lines...) Expand all
24 // Enhances sound sources coming directly in front of a uniform linear array 24 // Enhances sound sources coming directly in front of a uniform linear array
25 // and suppresses sound sources coming from all other directions. Operates on 25 // and suppresses sound sources coming from all other directions. Operates on
26 // multichannel signals and produces single-channel output. 26 // multichannel signals and produces single-channel output.
27 // 27 //
28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear 28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
29 // Beamforming Postprocessor" by Bastiaan Kleijn. 29 // Beamforming Postprocessor" by Bastiaan Kleijn.
30 class NonlinearBeamformer 30 class NonlinearBeamformer
31 : public Beamformer<float>, 31 : public Beamformer<float>,
32 public LappedTransform::Callback { 32 public LappedTransform::Callback {
33 public: 33 public:
34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); 34 explicit NonlinearBeamformer(
35 const std::vector<Point>& array_geometry,
36 SphericalPointf target_direction = SphericalPointf(M_PI / 2.f, 0.f, 1.f));
35 37
36 // Sample rate corresponds to the lower band. 38 // Sample rate corresponds to the lower band.
37 // Needs to be called before the NonlinearBeamformer can be used. 39 // Needs to be called before the NonlinearBeamformer can be used.
38 void Initialize(int chunk_size_ms, int sample_rate_hz) override; 40 void Initialize(int chunk_size_ms, int sample_rate_hz) override;
39 41
40 // Process one time-domain chunk of audio. The audio is expected to be split 42 // Process one time-domain chunk of audio. The audio is expected to be split
41 // into frequency bands inside the ChannelBuffer. The number of frames and 43 // into frequency bands inside the ChannelBuffer. The number of frames and
42 // channels must correspond to the constructor parameters. The same 44 // channels must correspond to the constructor parameters. The same
43 // ChannelBuffer can be passed in as |input| and |output|. 45 // ChannelBuffer can be passed in as |input| and |output|.
44 void ProcessChunk(const ChannelBuffer<float>& input, 46 void ProcessChunk(const ChannelBuffer<float>& input,
45 ChannelBuffer<float>* output) override; 47 ChannelBuffer<float>* output) override;
46 48
49 void AimAt(const SphericalPointf& target_direction) override;
50
47 bool IsInBeam(const SphericalPointf& spherical_point) override; 51 bool IsInBeam(const SphericalPointf& spherical_point) override;
48 52
49 // After processing each block |is_target_present_| is set to true if the 53 // After processing each block |is_target_present_| is set to true if the
50 // target signal es present and to false otherwise. This methods can be called 54 // target signal es present and to false otherwise. This methods can be called
51 // to know if the data is target signal or interference and process it 55 // to know if the data is target signal or interference and process it
52 // accordingly. 56 // accordingly.
53 bool is_target_present() override { return is_target_present_; } 57 bool is_target_present() override { return is_target_present_; }
54 58
55 protected: 59 protected:
56 // Process one frequency-domain block of audio. This is where the fun 60 // Process one frequency-domain block of audio. This is where the fun
57 // happens. Implements LappedTransform::Callback. 61 // happens. Implements LappedTransform::Callback.
58 void ProcessAudioBlock(const complex<float>* const* input, 62 void ProcessAudioBlock(const complex<float>* const* input,
59 int num_input_channels, 63 int num_input_channels,
60 size_t num_freq_bins, 64 size_t num_freq_bins,
61 int num_output_channels, 65 int num_output_channels,
62 complex<float>* const* output) override; 66 complex<float>* const* output) override;
63 67
64 private: 68 private:
65 typedef Matrix<float> MatrixF; 69 typedef Matrix<float> MatrixF;
66 typedef ComplexMatrix<float> ComplexMatrixF; 70 typedef ComplexMatrix<float> ComplexMatrixF;
67 typedef complex<float> complex_f; 71 typedef complex<float> complex_f;
68 72
69 void InitFrequencyCorrectionRanges(); 73 void InitLowFrequencyCorrectionRanges();
74 void InitHighFrequencyCorrectionRanges();
70 void InitInterfAngles(); 75 void InitInterfAngles();
71 void InitDelaySumMasks(); 76 void InitDelaySumMasks();
72 void InitTargetCovMats(); 77 void InitTargetCovMats();
78 void InitDiffuseCovMats();
73 void InitInterfCovMats(); 79 void InitInterfCovMats();
80 void NormalizeCovMats();
74 81
75 // Calculates postfilter masks that minimize the mean squared error of our 82 // Calculates postfilter masks that minimize the mean squared error of our
76 // estimation of the desired signal. 83 // estimation of the desired signal.
77 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, 84 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
78 float rpsiw, 85 float rpsiw,
79 float ratio_rxiw_rxim, 86 float ratio_rxiw_rxim,
80 float rmxi_r); 87 float rmxi_r);
81 88
82 // Prevents the postfilter masks from degenerating too quickly (a cause of 89 // Prevents the postfilter masks from degenerating too quickly (a cause of
83 // musical noise). 90 // musical noise).
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 size_t high_mean_start_bin_; 133 size_t high_mean_start_bin_;
127 size_t high_mean_end_bin_; 134 size_t high_mean_end_bin_;
128 135
129 // Quickly varying mask updated every block. 136 // Quickly varying mask updated every block.
130 float new_mask_[kNumFreqBins]; 137 float new_mask_[kNumFreqBins];
131 // Time smoothed mask. 138 // Time smoothed mask.
132 float time_smooth_mask_[kNumFreqBins]; 139 float time_smooth_mask_[kNumFreqBins];
133 // Time and frequency smoothed mask. 140 // Time and frequency smoothed mask.
134 float final_mask_[kNumFreqBins]; 141 float final_mask_[kNumFreqBins];
135 142
143 float target_angle_radians_;
136 // Angles of the interferer scenarios. 144 // Angles of the interferer scenarios.
137 std::vector<float> interf_angles_radians_; 145 std::vector<float> interf_angles_radians_;
138 146
139 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. 147 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
140 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; 148 ComplexMatrixF delay_sum_masks_[kNumFreqBins];
141 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; 149 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
142 150
143 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 151 // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
144 // |num_input_channels_|. 152 // |num_input_channels_|.
145 ComplexMatrixF target_cov_mats_[kNumFreqBins]; 153 ComplexMatrixF target_cov_mats_[kNumFreqBins];
146 154 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];
147 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 155 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
148 // |num_input_channels_|. ScopedVector has a size equal to the number of 156 // |num_input_channels_|. ScopedVector has a size equal to the number of
149 // interferer scenarios. 157 // interferer scenarios.
150 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins]; 158 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];
151 159
152 // Of length |kNumFreqBins|. 160 // Of length |kNumFreqBins|.
153 float wave_numbers_[kNumFreqBins]; 161 float wave_numbers_[kNumFreqBins];
154 162
155 // Preallocated for ProcessAudioBlock() 163 // Preallocated for ProcessAudioBlock()
156 // Of length |kNumFreqBins|. 164 // Of length |kNumFreqBins|.
(...skipping 12 matching lines...) Expand all
169 // Number of blocks after which the data is considered interference if the 177 // Number of blocks after which the data is considered interference if the
170 // mask does not pass |kMaskSignalThreshold|. 178 // mask does not pass |kMaskSignalThreshold|.
171 size_t hold_target_blocks_; 179 size_t hold_target_blocks_;
172 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. 180 // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
173 size_t interference_blocks_count_; 181 size_t interference_blocks_count_;
174 }; 182 };
175 183
176 } // namespace webrtc 184 } // namespace webrtc
177 185
178 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 186 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698