OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
52 // After processing each block |is_target_present_| is set to true if the | 52 // After processing each block |is_target_present_| is set to true if the |
53 // target signal es present and to false otherwise. This methods can be called | 53 // target signal es present and to false otherwise. This methods can be called |
54 // to know if the data is target signal or interference and process it | 54 // to know if the data is target signal or interference and process it |
55 // accordingly. | 55 // accordingly. |
56 bool is_target_present() override { return is_target_present_; } | 56 bool is_target_present() override { return is_target_present_; } |
57 | 57 |
58 protected: | 58 protected: |
59 // Process one frequency-domain block of audio. This is where the fun | 59 // Process one frequency-domain block of audio. This is where the fun |
60 // happens. Implements LappedTransform::Callback. | 60 // happens. Implements LappedTransform::Callback. |
61 void ProcessAudioBlock(const complex<float>* const* input, | 61 void ProcessAudioBlock(const complex<float>* const* input, |
62 int num_input_channels, | 62 int num_input_channels, |
Andrew MacDonald
2015/07/24 04:01:43
num_input_channels and num_output_channels should
| |
63 int num_freq_bins, | 63 size_t num_freq_bins, |
64 int num_output_channels, | 64 int num_output_channels, |
65 complex<float>* const* output) override; | 65 complex<float>* const* output) override; |
66 | 66 |
67 private: | 67 private: |
68 typedef Matrix<float> MatrixF; | 68 typedef Matrix<float> MatrixF; |
69 typedef ComplexMatrix<float> ComplexMatrixF; | 69 typedef ComplexMatrix<float> ComplexMatrixF; |
70 typedef complex<float> complex_f; | 70 typedef complex<float> complex_f; |
71 | 71 |
72 void InitDelaySumMasks(); | 72 void InitDelaySumMasks(); |
73 void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle. | 73 void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle. |
(...skipping 19 matching lines...) Expand all Loading... | |
93 void ApplyLowFrequencyCorrection(); | 93 void ApplyLowFrequencyCorrection(); |
94 | 94 |
95 // Postfilter masks are also unreliable at high frequencies. Average mid-high | 95 // Postfilter masks are also unreliable at high frequencies. Average mid-high |
96 // frequency masks to calculate a single mask per block which can be applied | 96 // frequency masks to calculate a single mask per block which can be applied |
97 // in the time-domain. Further, we average these block-masks over a chunk, | 97 // in the time-domain. Further, we average these block-masks over a chunk, |
98 // resulting in one postfilter mask per audio chunk. This allows us to skip | 98 // resulting in one postfilter mask per audio chunk. This allows us to skip |
99 // both transforming and blocking the high-frequency signal. | 99 // both transforming and blocking the high-frequency signal. |
100 void ApplyHighFrequencyCorrection(); | 100 void ApplyHighFrequencyCorrection(); |
101 | 101 |
102 // Compute the means needed for the above frequency correction. | 102 // Compute the means needed for the above frequency correction. |
103 float MaskRangeMean(int start_bin, int end_bin); | 103 float MaskRangeMean(size_t start_bin, size_t end_bin); |
104 | 104 |
105 // Applies both sets of masks to |input| and store in |output|. | 105 // Applies both sets of masks to |input| and store in |output|. |
106 void ApplyMasks(const complex_f* const* input, complex_f* const* output); | 106 void ApplyMasks(const complex_f* const* input, complex_f* const* output); |
107 | 107 |
108 void EstimateTargetPresence(); | 108 void EstimateTargetPresence(); |
109 | 109 |
110 static const int kFftSize = 256; | 110 static const size_t kFftSize = 256; |
111 static const int kNumFreqBins = kFftSize / 2 + 1; | 111 static const size_t kNumFreqBins = kFftSize / 2 + 1; |
112 | 112 |
113 // Deals with the fft transform and blocking. | 113 // Deals with the fft transform and blocking. |
114 int chunk_length_; | 114 size_t chunk_length_; |
115 rtc::scoped_ptr<LappedTransform> lapped_transform_; | 115 rtc::scoped_ptr<LappedTransform> lapped_transform_; |
116 float window_[kFftSize]; | 116 float window_[kFftSize]; |
117 | 117 |
118 // Parameters exposed to the user. | 118 // Parameters exposed to the user. |
119 const int num_input_channels_; | 119 const int num_input_channels_; |
120 int sample_rate_hz_; | 120 int sample_rate_hz_; |
121 | 121 |
122 const std::vector<Point> array_geometry_; | 122 const std::vector<Point> array_geometry_; |
123 | 123 |
124 // Calculated based on user-input and constants in the .cc file. | 124 // Calculated based on user-input and constants in the .cc file. |
125 int low_mean_start_bin_; | 125 size_t low_mean_start_bin_; |
126 int low_mean_end_bin_; | 126 size_t low_mean_end_bin_; |
127 int high_mean_start_bin_; | 127 size_t high_mean_start_bin_; |
128 int high_mean_end_bin_; | 128 size_t high_mean_end_bin_; |
129 | 129 |
130 // Quickly varying mask updated every block. | 130 // Quickly varying mask updated every block. |
131 float new_mask_[kNumFreqBins]; | 131 float new_mask_[kNumFreqBins]; |
132 // Time smoothed mask. | 132 // Time smoothed mask. |
133 float time_smooth_mask_[kNumFreqBins]; | 133 float time_smooth_mask_[kNumFreqBins]; |
134 // Time and frequency smoothed mask. | 134 // Time and frequency smoothed mask. |
135 float final_mask_[kNumFreqBins]; | 135 float final_mask_[kNumFreqBins]; |
136 | 136 |
137 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. | 137 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. |
138 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; | 138 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; |
(...skipping 21 matching lines...) Expand all Loading... | |
160 // The microphone normalization factor. | 160 // The microphone normalization factor. |
161 ComplexMatrixF eig_m_; | 161 ComplexMatrixF eig_m_; |
162 | 162 |
163 // For processing the high-frequency input signal. | 163 // For processing the high-frequency input signal. |
164 float high_pass_postfilter_mask_; | 164 float high_pass_postfilter_mask_; |
165 | 165 |
166 // True when the target signal is present. | 166 // True when the target signal is present. |
167 bool is_target_present_; | 167 bool is_target_present_; |
168 // Number of blocks after which the data is considered interference if the | 168 // Number of blocks after which the data is considered interference if the |
169 // mask does not pass |kMaskSignalThreshold|. | 169 // mask does not pass |kMaskSignalThreshold|. |
170 int hold_target_blocks_; | 170 size_t hold_target_blocks_; |
171 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. | 171 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. |
172 int interference_blocks_count_; | 172 size_t interference_blocks_count_; |
173 }; | 173 }; |
174 | 174 |
175 } // namespace webrtc | 175 } // namespace webrtc |
176 | 176 |
177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
OLD | NEW |