Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1268)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1394103003: Make the nonlinear beamformer steerable (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@highfreq
Patch Set: Formatting Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 13 matching lines...) Expand all
24 // Enhances sound sources coming directly in front of a uniform linear array 24 // Enhances sound sources coming directly in front of a uniform linear array
25 // and suppresses sound sources coming from all other directions. Operates on 25 // and suppresses sound sources coming from all other directions. Operates on
26 // multichannel signals and produces single-channel output. 26 // multichannel signals and produces single-channel output.
27 // 27 //
28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear 28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
29 // Beamforming Postprocessor" by Bastiaan Kleijn. 29 // Beamforming Postprocessor" by Bastiaan Kleijn.
30 class NonlinearBeamformer 30 class NonlinearBeamformer
31 : public Beamformer<float>, 31 : public Beamformer<float>,
32 public LappedTransform::Callback { 32 public LappedTransform::Callback {
33 public: 33 public:
34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); 34 static const float kHalfBeamWidthRadians;
35
36 explicit NonlinearBeamformer(
37 const std::vector<Point>& array_geometry,
38 SphericalPointf target_direction = SphericalPointf(M_PI / 2.f, 0.f, 1.f));
35 39
36 // Sample rate corresponds to the lower band. 40 // Sample rate corresponds to the lower band.
37 // Needs to be called before the NonlinearBeamformer can be used. 41 // Needs to be called before the NonlinearBeamformer can be used.
38 void Initialize(int chunk_size_ms, int sample_rate_hz) override; 42 void Initialize(int chunk_size_ms, int sample_rate_hz) override;
39 43
40 // Process one time-domain chunk of audio. The audio is expected to be split 44 // Process one time-domain chunk of audio. The audio is expected to be split
41 // into frequency bands inside the ChannelBuffer. The number of frames and 45 // into frequency bands inside the ChannelBuffer. The number of frames and
42 // channels must correspond to the constructor parameters. The same 46 // channels must correspond to the constructor parameters. The same
43 // ChannelBuffer can be passed in as |input| and |output|. 47 // ChannelBuffer can be passed in as |input| and |output|.
44 void ProcessChunk(const ChannelBuffer<float>& input, 48 void ProcessChunk(const ChannelBuffer<float>& input,
45 ChannelBuffer<float>* output) override; 49 ChannelBuffer<float>* output) override;
46 50
51 void AimAt(const SphericalPointf& target_direction) override;
52
47 bool IsInBeam(const SphericalPointf& spherical_point) override; 53 bool IsInBeam(const SphericalPointf& spherical_point) override;
48 54
49 // After processing each block |is_target_present_| is set to true if the 55 // After processing each block |is_target_present_| is set to true if the
50 // target signal es present and to false otherwise. This methods can be called 56 // target signal es present and to false otherwise. This methods can be called
51 // to know if the data is target signal or interference and process it 57 // to know if the data is target signal or interference and process it
52 // accordingly. 58 // accordingly.
53 bool is_target_present() override { return is_target_present_; } 59 bool is_target_present() override { return is_target_present_; }
54 60
55 protected: 61 protected:
56 // Process one frequency-domain block of audio. This is where the fun 62 // Process one frequency-domain block of audio. This is where the fun
57 // happens. Implements LappedTransform::Callback. 63 // happens. Implements LappedTransform::Callback.
58 void ProcessAudioBlock(const complex<float>* const* input, 64 void ProcessAudioBlock(const complex<float>* const* input,
59 int num_input_channels, 65 int num_input_channels,
60 size_t num_freq_bins, 66 size_t num_freq_bins,
61 int num_output_channels, 67 int num_output_channels,
62 complex<float>* const* output) override; 68 complex<float>* const* output) override;
63 69
64 private: 70 private:
71 FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest,
72 InterfAnglesTakeAmbiguityIntoAccount);
73
65 typedef Matrix<float> MatrixF; 74 typedef Matrix<float> MatrixF;
66 typedef ComplexMatrix<float> ComplexMatrixF; 75 typedef ComplexMatrix<float> ComplexMatrixF;
67 typedef complex<float> complex_f; 76 typedef complex<float> complex_f;
68 77
69 void InitFrequencyCorrectionRanges(); 78 void InitLowFrequencyCorrectionRanges();
79 void InitHighFrequencyCorrectionRanges();
70 void InitInterfAngles(); 80 void InitInterfAngles();
71 void InitDelaySumMasks(); 81 void InitDelaySumMasks();
72 void InitTargetCovMats(); 82 void InitTargetCovMats();
83 void InitDiffuseCovMats();
73 void InitInterfCovMats(); 84 void InitInterfCovMats();
85 void NormalizeCovMats();
74 86
75 // Calculates postfilter masks that minimize the mean squared error of our 87 // Calculates postfilter masks that minimize the mean squared error of our
76 // estimation of the desired signal. 88 // estimation of the desired signal.
77 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, 89 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
78 float rpsiw, 90 float rpsiw,
79 float ratio_rxiw_rxim, 91 float ratio_rxiw_rxim,
80 float rmxi_r); 92 float rmxi_r);
81 93
82 // Prevents the postfilter masks from degenerating too quickly (a cause of 94 // Prevents the postfilter masks from degenerating too quickly (a cause of
83 // musical noise). 95 // musical noise).
(...skipping 25 matching lines...) Expand all
109 // Deals with the fft transform and blocking. 121 // Deals with the fft transform and blocking.
110 size_t chunk_length_; 122 size_t chunk_length_;
111 rtc::scoped_ptr<LappedTransform> lapped_transform_; 123 rtc::scoped_ptr<LappedTransform> lapped_transform_;
112 float window_[kFftSize]; 124 float window_[kFftSize];
113 125
114 // Parameters exposed to the user. 126 // Parameters exposed to the user.
115 const int num_input_channels_; 127 const int num_input_channels_;
116 int sample_rate_hz_; 128 int sample_rate_hz_;
117 129
118 const std::vector<Point> array_geometry_; 130 const std::vector<Point> array_geometry_;
131 // The normal direction of the array in the xy-plane. It is set to null Point
Andrew MacDonald 2015/10/28 01:57:57 origin or zero
aluebs-webrtc 2015/10/29 00:34:21 Not necessary anymore, since using Maybe.
132 // if the array has no normal, or the normal is not in the xy-plane.
133 const Point array_normal_;
119 134
120 // Minimum spacing between microphone pairs. 135 // Minimum spacing between microphone pairs.
121 const float min_mic_spacing_; 136 const float min_mic_spacing_;
122 137
123 // Calculated based on user-input and constants in the .cc file. 138 // Calculated based on user-input and constants in the .cc file.
124 size_t low_mean_start_bin_; 139 size_t low_mean_start_bin_;
125 size_t low_mean_end_bin_; 140 size_t low_mean_end_bin_;
126 size_t high_mean_start_bin_; 141 size_t high_mean_start_bin_;
127 size_t high_mean_end_bin_; 142 size_t high_mean_end_bin_;
128 143
129 // Quickly varying mask updated every block. 144 // Quickly varying mask updated every block.
130 float new_mask_[kNumFreqBins]; 145 float new_mask_[kNumFreqBins];
131 // Time smoothed mask. 146 // Time smoothed mask.
132 float time_smooth_mask_[kNumFreqBins]; 147 float time_smooth_mask_[kNumFreqBins];
133 // Time and frequency smoothed mask. 148 // Time and frequency smoothed mask.
134 float final_mask_[kNumFreqBins]; 149 float final_mask_[kNumFreqBins];
135 150
151 float target_angle_radians_;
136 // Angles of the interferer scenarios. 152 // Angles of the interferer scenarios.
137 std::vector<float> interf_angles_radians_; 153 std::vector<float> interf_angles_radians_;
154 // The angle between the target and the interferer scenarios.
155 const float away_radians_;
138 156
139 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. 157 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
140 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; 158 ComplexMatrixF delay_sum_masks_[kNumFreqBins];
141 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; 159 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
142 160
143 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 161 // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
144 // |num_input_channels_|. 162 // |num_input_channels_|.
145 ComplexMatrixF target_cov_mats_[kNumFreqBins]; 163 ComplexMatrixF target_cov_mats_[kNumFreqBins];
146 164 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];
147 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 165 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
148 // |num_input_channels_|. ScopedVector has a size equal to the number of 166 // |num_input_channels_|. ScopedVector has a size equal to the number of
149 // interferer scenarios. 167 // interferer scenarios.
150 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins]; 168 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];
151 169
152 // Of length |kNumFreqBins|. 170 // Of length |kNumFreqBins|.
153 float wave_numbers_[kNumFreqBins]; 171 float wave_numbers_[kNumFreqBins];
154 172
155 // Preallocated for ProcessAudioBlock() 173 // Preallocated for ProcessAudioBlock()
156 // Of length |kNumFreqBins|. 174 // Of length |kNumFreqBins|.
(...skipping 12 matching lines...) Expand all
169 // Number of blocks after which the data is considered interference if the 187 // Number of blocks after which the data is considered interference if the
170 // mask does not pass |kMaskSignalThreshold|. 188 // mask does not pass |kMaskSignalThreshold|.
171 size_t hold_target_blocks_; 189 size_t hold_target_blocks_;
172 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. 190 // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
173 size_t interference_blocks_count_; 191 size_t interference_blocks_count_;
174 }; 192 };
175 193
176 } // namespace webrtc 194 } // namespace webrtc
177 195
178 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 196 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698