Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(263)

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h

Issue 1394103003: Make the nonlinear beamformer steerable (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@highfreq
Patch Set: More windows fun Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
13 13
14 // MSVC++ requires this to be set before any other includes to get M_PI.
15 #define _USE_MATH_DEFINES
16
17 #include <math.h>
14 #include <vector> 18 #include <vector>
15 19
16 #include "webrtc/common_audio/lapped_transform.h" 20 #include "webrtc/common_audio/lapped_transform.h"
17 #include "webrtc/common_audio/channel_buffer.h" 21 #include "webrtc/common_audio/channel_buffer.h"
18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" 22 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"
19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" 23 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
20 #include "webrtc/system_wrappers/include/scoped_vector.h" 24 #include "webrtc/system_wrappers/include/scoped_vector.h"
21 25
22 namespace webrtc { 26 namespace webrtc {
23 27
24 // Enhances sound sources coming directly in front of a uniform linear array 28 // Enhances sound sources coming directly in front of a uniform linear array
25 // and suppresses sound sources coming from all other directions. Operates on 29 // and suppresses sound sources coming from all other directions. Operates on
26 // multichannel signals and produces single-channel output. 30 // multichannel signals and produces single-channel output.
27 // 31 //
28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear 32 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
29 // Beamforming Postprocessor" by Bastiaan Kleijn. 33 // Beamforming Postprocessor" by Bastiaan Kleijn.
30 class NonlinearBeamformer 34 class NonlinearBeamformer
31 : public Beamformer<float>, 35 : public Beamformer<float>,
32 public LappedTransform::Callback { 36 public LappedTransform::Callback {
33 public: 37 public:
34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); 38 static const float kHalfBeamWidthRadians;
39
40 explicit NonlinearBeamformer(
41 const std::vector<Point>& array_geometry,
42 SphericalPointf target_direction =
43 SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));
35 44
36 // Sample rate corresponds to the lower band. 45 // Sample rate corresponds to the lower band.
37 // Needs to be called before the NonlinearBeamformer can be used. 46 // Needs to be called before the NonlinearBeamformer can be used.
38 void Initialize(int chunk_size_ms, int sample_rate_hz) override; 47 void Initialize(int chunk_size_ms, int sample_rate_hz) override;
39 48
40 // Process one time-domain chunk of audio. The audio is expected to be split 49 // Process one time-domain chunk of audio. The audio is expected to be split
41 // into frequency bands inside the ChannelBuffer. The number of frames and 50 // into frequency bands inside the ChannelBuffer. The number of frames and
42 // channels must correspond to the constructor parameters. The same 51 // channels must correspond to the constructor parameters. The same
43 // ChannelBuffer can be passed in as |input| and |output|. 52 // ChannelBuffer can be passed in as |input| and |output|.
44 void ProcessChunk(const ChannelBuffer<float>& input, 53 void ProcessChunk(const ChannelBuffer<float>& input,
45 ChannelBuffer<float>* output) override; 54 ChannelBuffer<float>* output) override;
46 55
56 void AimAt(const SphericalPointf& target_direction) override;
57
47 bool IsInBeam(const SphericalPointf& spherical_point) override; 58 bool IsInBeam(const SphericalPointf& spherical_point) override;
48 59
49 // After processing each block |is_target_present_| is set to true if the 60 // After processing each block |is_target_present_| is set to true if the
50 // target signal es present and to false otherwise. This methods can be called 61 // target signal es present and to false otherwise. This methods can be called
51 // to know if the data is target signal or interference and process it 62 // to know if the data is target signal or interference and process it
52 // accordingly. 63 // accordingly.
53 bool is_target_present() override { return is_target_present_; } 64 bool is_target_present() override { return is_target_present_; }
54 65
55 protected: 66 protected:
56 // Process one frequency-domain block of audio. This is where the fun 67 // Process one frequency-domain block of audio. This is where the fun
57 // happens. Implements LappedTransform::Callback. 68 // happens. Implements LappedTransform::Callback.
58 void ProcessAudioBlock(const complex<float>* const* input, 69 void ProcessAudioBlock(const complex<float>* const* input,
59 int num_input_channels, 70 int num_input_channels,
60 size_t num_freq_bins, 71 size_t num_freq_bins,
61 int num_output_channels, 72 int num_output_channels,
62 complex<float>* const* output) override; 73 complex<float>* const* output) override;
63 74
64 private: 75 private:
76 FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest,
77 InterfAnglesTakeAmbiguityIntoAccount);
78
65 typedef Matrix<float> MatrixF; 79 typedef Matrix<float> MatrixF;
66 typedef ComplexMatrix<float> ComplexMatrixF; 80 typedef ComplexMatrix<float> ComplexMatrixF;
67 typedef complex<float> complex_f; 81 typedef complex<float> complex_f;
68 82
69 void InitFrequencyCorrectionRanges(); 83 void InitLowFrequencyCorrectionRanges();
84 void InitHighFrequencyCorrectionRanges();
70 void InitInterfAngles(); 85 void InitInterfAngles();
71 void InitDelaySumMasks(); 86 void InitDelaySumMasks();
72 void InitTargetCovMats(); 87 void InitTargetCovMats();
88 void InitDiffuseCovMats();
73 void InitInterfCovMats(); 89 void InitInterfCovMats();
90 void NormalizeCovMats();
74 91
75 // Calculates postfilter masks that minimize the mean squared error of our 92 // Calculates postfilter masks that minimize the mean squared error of our
76 // estimation of the desired signal. 93 // estimation of the desired signal.
77 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, 94 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
78 float rpsiw, 95 float rpsiw,
79 float ratio_rxiw_rxim, 96 float ratio_rxiw_rxim,
80 float rmxi_r); 97 float rmxi_r);
81 98
82 // Prevents the postfilter masks from degenerating too quickly (a cause of 99 // Prevents the postfilter masks from degenerating too quickly (a cause of
83 // musical noise). 100 // musical noise).
(...skipping 25 matching lines...) Expand all
109 // Deals with the fft transform and blocking. 126 // Deals with the fft transform and blocking.
110 size_t chunk_length_; 127 size_t chunk_length_;
111 rtc::scoped_ptr<LappedTransform> lapped_transform_; 128 rtc::scoped_ptr<LappedTransform> lapped_transform_;
112 float window_[kFftSize]; 129 float window_[kFftSize];
113 130
114 // Parameters exposed to the user. 131 // Parameters exposed to the user.
115 const int num_input_channels_; 132 const int num_input_channels_;
116 int sample_rate_hz_; 133 int sample_rate_hz_;
117 134
118 const std::vector<Point> array_geometry_; 135 const std::vector<Point> array_geometry_;
136 // The normal direction of the array if it has one and it is in the xy-plane.
137 const rtc::Maybe<Point> array_normal_;
119 138
120 // Minimum spacing between microphone pairs. 139 // Minimum spacing between microphone pairs.
121 const float min_mic_spacing_; 140 const float min_mic_spacing_;
122 141
123 // Calculated based on user-input and constants in the .cc file. 142 // Calculated based on user-input and constants in the .cc file.
124 size_t low_mean_start_bin_; 143 size_t low_mean_start_bin_;
125 size_t low_mean_end_bin_; 144 size_t low_mean_end_bin_;
126 size_t high_mean_start_bin_; 145 size_t high_mean_start_bin_;
127 size_t high_mean_end_bin_; 146 size_t high_mean_end_bin_;
128 147
129 // Quickly varying mask updated every block. 148 // Quickly varying mask updated every block.
130 float new_mask_[kNumFreqBins]; 149 float new_mask_[kNumFreqBins];
131 // Time smoothed mask. 150 // Time smoothed mask.
132 float time_smooth_mask_[kNumFreqBins]; 151 float time_smooth_mask_[kNumFreqBins];
133 // Time and frequency smoothed mask. 152 // Time and frequency smoothed mask.
134 float final_mask_[kNumFreqBins]; 153 float final_mask_[kNumFreqBins];
135 154
155 float target_angle_radians_;
136 // Angles of the interferer scenarios. 156 // Angles of the interferer scenarios.
137 std::vector<float> interf_angles_radians_; 157 std::vector<float> interf_angles_radians_;
158 // The angle between the target and the interferer scenarios.
159 const float away_radians_;
138 160
139 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. 161 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
140 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; 162 ComplexMatrixF delay_sum_masks_[kNumFreqBins];
141 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; 163 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
142 164
143 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 165 // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
144 // |num_input_channels_|. 166 // |num_input_channels_|.
145 ComplexMatrixF target_cov_mats_[kNumFreqBins]; 167 ComplexMatrixF target_cov_mats_[kNumFreqBins];
146 168 ComplexMatrixF uniform_cov_mat_[kNumFreqBins];
147 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x 169 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
148 // |num_input_channels_|. ScopedVector has a size equal to the number of 170 // |num_input_channels_|. ScopedVector has a size equal to the number of
149 // interferer scenarios. 171 // interferer scenarios.
150 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins]; 172 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];
151 173
152 // Of length |kNumFreqBins|. 174 // Of length |kNumFreqBins|.
153 float wave_numbers_[kNumFreqBins]; 175 float wave_numbers_[kNumFreqBins];
154 176
155 // Preallocated for ProcessAudioBlock() 177 // Preallocated for ProcessAudioBlock()
156 // Of length |kNumFreqBins|. 178 // Of length |kNumFreqBins|.
(...skipping 12 matching lines...) Expand all
169 // Number of blocks after which the data is considered interference if the 191 // Number of blocks after which the data is considered interference if the
170 // mask does not pass |kMaskSignalThreshold|. 192 // mask does not pass |kMaskSignalThreshold|.
171 size_t hold_target_blocks_; 193 size_t hold_target_blocks_;
172 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. 194 // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
173 size_t interference_blocks_count_; 195 size_t interference_blocks_count_;
174 }; 196 };
175 197
176 } // namespace webrtc 198 } // namespace webrtc
177 199
178 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ 200 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698