| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
| 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
| 13 | 13 |
| 14 #include <vector> | 14 #include <vector> |
| 15 | 15 |
| 16 #include "webrtc/common_audio/lapped_transform.h" | 16 #include "webrtc/common_audio/lapped_transform.h" |
| 17 #include "webrtc/common_audio/channel_buffer.h" | 17 #include "webrtc/common_audio/channel_buffer.h" |
| 18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" | 18 #include "webrtc/modules/audio_processing/beamformer/beamformer.h" |
| 19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" | 19 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" |
| 20 #include "webrtc/system_wrappers/interface/scoped_vector.h" |
| 20 | 21 |
| 21 namespace webrtc { | 22 namespace webrtc { |
| 22 | 23 |
| 23 // Enhances sound sources coming directly in front of a uniform linear array | 24 // Enhances sound sources coming directly in front of a uniform linear array |
| 24 // and suppresses sound sources coming from all other directions. Operates on | 25 // and suppresses sound sources coming from all other directions. Operates on |
| 25 // multichannel signals and produces single-channel output. | 26 // multichannel signals and produces single-channel output. |
| 26 // | 27 // |
| 27 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear | 28 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear |
| 28 // Beamforming Postprocessor" by Bastiaan Kleijn. | 29 // Beamforming Postprocessor" by Bastiaan Kleijn. |
| 29 // | |
| 30 // TODO(aluebs): Target angle assumed to be 0. Parameterize target angle. | |
| 31 class NonlinearBeamformer | 30 class NonlinearBeamformer |
| 32 : public Beamformer<float>, | 31 : public Beamformer<float>, |
| 33 public LappedTransform::Callback { | 32 public LappedTransform::Callback { |
| 34 public: | 33 public: |
| 35 // At the moment it only accepts uniform linear microphone arrays. Using the | |
| 36 // first microphone as a reference position [0, 0, 0] is a natural choice. | |
| 37 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); | 34 explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); |
| 38 | 35 |
| 39 // Sample rate corresponds to the lower band. | 36 // Sample rate corresponds to the lower band. |
| 40 // Needs to be called before the NonlinearBeamformer can be used. | 37 // Needs to be called before the NonlinearBeamformer can be used. |
| 41 void Initialize(int chunk_size_ms, int sample_rate_hz) override; | 38 void Initialize(int chunk_size_ms, int sample_rate_hz) override; |
| 42 | 39 |
| 43 // Process one time-domain chunk of audio. The audio is expected to be split | 40 // Process one time-domain chunk of audio. The audio is expected to be split |
| 44 // into frequency bands inside the ChannelBuffer. The number of frames and | 41 // into frequency bands inside the ChannelBuffer. The number of frames and |
| 45 // channels must correspond to the constructor parameters. The same | 42 // channels must correspond to the constructor parameters. The same |
| 46 // ChannelBuffer can be passed in as |input| and |output|. | 43 // ChannelBuffer can be passed in as |input| and |output|. |
| (...skipping 15 matching lines...) Expand all Loading... |
| 62 int num_input_channels, | 59 int num_input_channels, |
| 63 size_t num_freq_bins, | 60 size_t num_freq_bins, |
| 64 int num_output_channels, | 61 int num_output_channels, |
| 65 complex<float>* const* output) override; | 62 complex<float>* const* output) override; |
| 66 | 63 |
| 67 private: | 64 private: |
| 68 typedef Matrix<float> MatrixF; | 65 typedef Matrix<float> MatrixF; |
| 69 typedef ComplexMatrix<float> ComplexMatrixF; | 66 typedef ComplexMatrix<float> ComplexMatrixF; |
| 70 typedef complex<float> complex_f; | 67 typedef complex<float> complex_f; |
| 71 | 68 |
| 69 void InitInterfAngles(); |
| 72 void InitDelaySumMasks(); | 70 void InitDelaySumMasks(); |
| 73 void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle. | 71 void InitTargetCovMats(); |
| 74 void InitInterfCovMats(); | 72 void InitInterfCovMats(); |
| 75 | 73 |
| 76 // An implementation of equation 18, which calculates postfilter masks that, | 74 // Calculates postfilter masks that minimize the mean squared error of our |
| 77 // when applied, minimize the mean-square error of our estimation of the | 75 // estimation of the desired signal. |
| 78 // desired signal. A sub-task is to calculate lambda, which is solved via | |
| 79 // equation 13. | |
| 80 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, | 76 float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, |
| 81 float rpsiw, | 77 float rpsiw, |
| 82 float ratio_rxiw_rxim, | 78 float ratio_rxiw_rxim, |
| 83 float rmxi_r, | 79 float rmxi_r); |
| 84 float mask_threshold); | |
| 85 | 80 |
| 86 // Prevents the postfilter masks from degenerating too quickly (a cause of | 81 // Prevents the postfilter masks from degenerating too quickly (a cause of |
| 87 // musical noise). | 82 // musical noise). |
| 88 void ApplyMaskTimeSmoothing(); | 83 void ApplyMaskTimeSmoothing(); |
| 89 void ApplyMaskFrequencySmoothing(); | 84 void ApplyMaskFrequencySmoothing(); |
| 90 | 85 |
| 91 // The postfilter masks are unreliable at low frequencies. Calculates a better | 86 // The postfilter masks are unreliable at low frequencies. Calculates a better |
| 92 // mask by averaging mid-low frequency values. | 87 // mask by averaging mid-low frequency values. |
| 93 void ApplyLowFrequencyCorrection(); | 88 void ApplyLowFrequencyCorrection(); |
| 94 | 89 |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 127 size_t high_mean_start_bin_; | 122 size_t high_mean_start_bin_; |
| 128 size_t high_mean_end_bin_; | 123 size_t high_mean_end_bin_; |
| 129 | 124 |
| 130 // Quickly varying mask updated every block. | 125 // Quickly varying mask updated every block. |
| 131 float new_mask_[kNumFreqBins]; | 126 float new_mask_[kNumFreqBins]; |
| 132 // Time smoothed mask. | 127 // Time smoothed mask. |
| 133 float time_smooth_mask_[kNumFreqBins]; | 128 float time_smooth_mask_[kNumFreqBins]; |
| 134 // Time and frequency smoothed mask. | 129 // Time and frequency smoothed mask. |
| 135 float final_mask_[kNumFreqBins]; | 130 float final_mask_[kNumFreqBins]; |
| 136 | 131 |
| 132 // Angles of the interferer scenarios. |
| 133 std::vector<float> interf_angles_radians_; |
| 134 |
| 137 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. | 135 // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. |
| 138 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; | 136 ComplexMatrixF delay_sum_masks_[kNumFreqBins]; |
| 139 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; | 137 ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; |
| 140 | 138 |
| 141 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x | 139 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x |
| 142 // |num_input_channels_|. | 140 // |num_input_channels_|. |
| 143 ComplexMatrixF target_cov_mats_[kNumFreqBins]; | 141 ComplexMatrixF target_cov_mats_[kNumFreqBins]; |
| 144 | 142 |
| 145 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x | 143 // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x |
| 146 // |num_input_channels_|. | 144 // |num_input_channels_|. ScopedVector has a size equal to the number of |
| 147 ComplexMatrixF interf_cov_mats_[kNumFreqBins]; | 145 // interferer scenarios. |
| 148 ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins]; | 146 ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins]; |
| 149 | 147 |
| 150 // Of length |kNumFreqBins|. | 148 // Of length |kNumFreqBins|. |
| 151 float mask_thresholds_[kNumFreqBins]; | |
| 152 float wave_numbers_[kNumFreqBins]; | 149 float wave_numbers_[kNumFreqBins]; |
| 153 | 150 |
| 154 // Preallocated for ProcessAudioBlock() | 151 // Preallocated for ProcessAudioBlock() |
| 155 // Of length |kNumFreqBins|. | 152 // Of length |kNumFreqBins|. |
| 156 float rxiws_[kNumFreqBins]; | 153 float rxiws_[kNumFreqBins]; |
| 157 float rpsiws_[kNumFreqBins]; | 154 // The vector has a size equal to the number of interferer scenarios. |
| 158 float reflected_rpsiws_[kNumFreqBins]; | 155 std::vector<float> rpsiws_[kNumFreqBins]; |
| 159 | 156 |
| 160 // The microphone normalization factor. | 157 // The microphone normalization factor. |
| 161 ComplexMatrixF eig_m_; | 158 ComplexMatrixF eig_m_; |
| 162 | 159 |
| 163 // For processing the high-frequency input signal. | 160 // For processing the high-frequency input signal. |
| 164 float high_pass_postfilter_mask_; | 161 float high_pass_postfilter_mask_; |
| 165 | 162 |
| 166 // True when the target signal is present. | 163 // True when the target signal is present. |
| 167 bool is_target_present_; | 164 bool is_target_present_; |
| 168 // Number of blocks after which the data is considered interference if the | 165 // Number of blocks after which the data is considered interference if the |
| 169 // mask does not pass |kMaskSignalThreshold|. | 166 // mask does not pass |kMaskSignalThreshold|. |
| 170 size_t hold_target_blocks_; | 167 size_t hold_target_blocks_; |
| 171 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. | 168 // Number of blocks since the last mask that passed |kMaskSignalThreshold|. |
| 172 size_t interference_blocks_count_; | 169 size_t interference_blocks_count_; |
| 173 }; | 170 }; |
| 174 | 171 |
| 175 } // namespace webrtc | 172 } // namespace webrtc |
| 176 | 173 |
| 177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ | 174 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ |
| OLD | NEW |