OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
53 float var_decay_rate; | 53 float var_decay_rate; |
54 size_t var_window_size; | 54 size_t var_window_size; |
55 int analysis_rate; | 55 int analysis_rate; |
56 float gain_change_limit; | 56 float gain_change_limit; |
57 float rho; | 57 float rho; |
58 }; | 58 }; |
59 | 59 |
60 explicit IntelligibilityEnhancer(const Config& config); | 60 explicit IntelligibilityEnhancer(const Config& config); |
61 IntelligibilityEnhancer(); // Initialize with default config. | 61 IntelligibilityEnhancer(); // Initialize with default config. |
62 | 62 |
63 // Reads and processes chunk of noise stream in time domain. | 63 // Sets the capture noise estimate. |
hlundin-webrtc
2016/02/08 10:29:28
"Noise estimate" is a bit generic. It is the noise
aluebs-webrtc
2016/02/09 00:19:15
Right. Improved the comment.
| |
64 void AnalyzeCaptureAudio(float* const* audio, | 64 void SetCaptureNoiseEstimate(const std::vector<float>& noise); |
65 int sample_rate_hz, | |
66 size_t num_channels); | |
67 | 65 |
68 // Reads chunk of speech in time domain and updates with modified signal. | 66 // Reads chunk of speech in time domain and updates with modified signal. |
69 void ProcessRenderAudio(float* const* audio, | 67 void ProcessRenderAudio(float* const* audio, |
70 int sample_rate_hz, | 68 int sample_rate_hz, |
71 size_t num_channels); | 69 size_t num_channels); |
72 bool active() const; | 70 bool active() const; |
73 | 71 |
74 private: | 72 private: |
75 enum AudioSource { | |
76 kRenderStream = 0, // Clear speech stream. | |
77 kCaptureStream, // Noise stream. | |
78 }; | |
79 | |
80 // Provides access point to the frequency domain. | 73 // Provides access point to the frequency domain. |
81 class TransformCallback : public LappedTransform::Callback { | 74 class TransformCallback : public LappedTransform::Callback { |
82 public: | 75 public: |
83 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source); | 76 TransformCallback(IntelligibilityEnhancer* parent); |
84 | 77 |
85 // All in frequency domain, receives input |in_block|, applies | 78 // All in frequency domain, receives input |in_block|, applies |
86 // intelligibility enhancement, and writes result to |out_block|. | 79 // intelligibility enhancement, and writes result to |out_block|. |
87 void ProcessAudioBlock(const std::complex<float>* const* in_block, | 80 void ProcessAudioBlock(const std::complex<float>* const* in_block, |
88 size_t in_channels, | 81 size_t in_channels, |
89 size_t frames, | 82 size_t frames, |
90 size_t out_channels, | 83 size_t out_channels, |
91 std::complex<float>* const* out_block) override; | 84 std::complex<float>* const* out_block) override; |
92 | 85 |
93 private: | 86 private: |
94 IntelligibilityEnhancer* parent_; | 87 IntelligibilityEnhancer* parent_; |
95 AudioSource source_; | |
96 }; | 88 }; |
97 friend class TransformCallback; | 89 friend class TransformCallback; |
98 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); | 90 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); |
99 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); | 91 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); |
100 | 92 |
101 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source. | |
102 void DispatchAudio(AudioSource source, | |
103 const std::complex<float>* in_block, | |
104 std::complex<float>* out_block); | |
105 | |
106 // Updates variance computation and analysis with |in_block_|, | 93 // Updates variance computation and analysis with |in_block_|, |
107 // and writes modified speech to |out_block|. | 94 // and writes modified speech to |out_block|. |
108 void ProcessClearBlock(const std::complex<float>* in_block, | 95 void ProcessClearBlock(const std::complex<float>* in_block, |
109 std::complex<float>* out_block); | 96 std::complex<float>* out_block); |
110 | 97 |
111 // Computes and sets modified gains. | 98 // Computes and sets modified gains. |
112 void AnalyzeClearBlock(float power_target); | 99 void AnalyzeClearBlock(float power_target); |
113 | 100 |
114 // Bisection search for optimal |lambda|. | 101 // Bisection search for optimal |lambda|. |
115 void SolveForLambda(float power_target, float power_bot, float power_top); | 102 void SolveForLambda(float power_target, float power_bot, float power_top); |
116 | 103 |
117 // Transforms freq gains to ERB gains. | 104 // Transforms freq gains to ERB gains. |
118 void UpdateErbGains(); | 105 void UpdateErbGains(); |
119 | 106 |
120 // Updates variance calculation for noise input with |in_block|. | |
121 void ProcessNoiseBlock(const std::complex<float>* in_block, | |
122 std::complex<float>* out_block); | |
123 | |
124 // Returns number of ERB filters. | 107 // Returns number of ERB filters. |
125 static size_t GetBankSize(int sample_rate, size_t erb_resolution); | 108 static size_t GetBankSize(int sample_rate, size_t erb_resolution); |
126 | 109 |
127 // Initializes ERB filterbank. | 110 // Initializes ERB filterbank. |
128 void CreateErbBank(); | 111 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs); |
129 | 112 |
130 // Analytically solves quadratic for optimal gains given |lambda|. | 113 // Analytically solves quadratic for optimal gains given |lambda|. |
131 // Negative gains are set to 0. Stores the results in |sols|. | 114 // Negative gains are set to 0. Stores the results in |sols|. |
132 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); | 115 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); |
133 | 116 |
134 // Computes variance across ERB filters from freq variance |var|. | 117 // Computes variance across ERB filters from freq variance |var|. |
135 // Stores in |result|. | 118 // Stores in |result|. |
136 void FilterVariance(const float* var, float* result); | 119 void FilterVariance(const float* var, |
120 const std::vector<std::vector<float>>& filter_bank, | |
121 float* result); | |
137 | 122 |
138 // Returns dot product of vectors specified by size |length| arrays |a|,|b|. | 123 // Returns dot product of vectors specified by size |length| arrays |a|,|b|. |
139 static float DotProduct(const float* a, const float* b, size_t length); | 124 static float DotProduct(const float* a, const float* b, size_t length); |
140 | 125 |
141 const size_t freqs_; // Num frequencies in frequency domain. | 126 const size_t freqs_; // Num frequencies in frequency domain. |
142 const size_t window_size_; // Window size in samples; also the block size. | 127 const size_t window_size_; // Window size in samples; also the block size. |
143 const size_t chunk_length_; // Chunk size in samples. | 128 const size_t chunk_length_; // Chunk size in samples. |
144 const size_t bank_size_; // Num ERB filters. | 129 const size_t bank_size_; // Num ERB filters. |
145 const int sample_rate_hz_; | 130 const int sample_rate_hz_; |
146 const int erb_resolution_; | 131 const int erb_resolution_; |
147 const size_t num_capture_channels_; | 132 const size_t num_capture_channels_; |
148 const size_t num_render_channels_; | 133 const size_t num_render_channels_; |
149 const int analysis_rate_; // Num blocks before gains recalculated. | 134 const int analysis_rate_; // Num blocks before gains recalculated. |
150 | 135 |
151 const bool active_; // Whether render gains are being updated. | 136 const bool active_; // Whether render gains are being updated. |
152 // TODO(ekm): Add logic for updating |active_|. | 137 // TODO(ekm): Add logic for updating |active_|. |
153 | 138 |
154 intelligibility::VarianceArray clear_variance_; | 139 intelligibility::VarianceArray clear_variance_; |
155 intelligibility::VarianceArray noise_variance_; | 140 std::vector<float> noise_power_; |
156 rtc::scoped_ptr<float[]> filtered_clear_var_; | 141 rtc::scoped_ptr<float[]> filtered_clear_var_; |
157 rtc::scoped_ptr<float[]> filtered_noise_var_; | 142 rtc::scoped_ptr<float[]> filtered_noise_var_; |
158 std::vector<std::vector<float>> filter_bank_; | |
159 rtc::scoped_ptr<float[]> center_freqs_; | 143 rtc::scoped_ptr<float[]> center_freqs_; |
144 std::vector<std::vector<float>> capture_filter_bank_; | |
145 std::vector<std::vector<float>> render_filter_bank_; | |
160 size_t start_freq_; | 146 size_t start_freq_; |
161 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR. | 147 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR. |
162 // for each ERB band. | 148 // for each ERB band. |
163 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains. | 149 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains. |
164 intelligibility::GainApplier gain_applier_; | 150 intelligibility::GainApplier gain_applier_; |
165 | 151 |
166 // Destination buffers used to reassemble blocked chunks before overwriting | 152 // Destination buffers used to reassemble blocked chunks before overwriting |
167 // the original input array with modifications. | 153 // the original input array with modifications. |
168 ChannelBuffer<float> temp_render_out_buffer_; | 154 ChannelBuffer<float> temp_render_out_buffer_; |
169 ChannelBuffer<float> temp_capture_out_buffer_; | |
170 | 155 |
171 rtc::scoped_ptr<float[]> kbd_window_; | 156 rtc::scoped_ptr<float[]> kbd_window_; |
172 TransformCallback render_callback_; | 157 TransformCallback render_callback_; |
173 TransformCallback capture_callback_; | |
174 rtc::scoped_ptr<LappedTransform> render_mangler_; | 158 rtc::scoped_ptr<LappedTransform> render_mangler_; |
175 rtc::scoped_ptr<LappedTransform> capture_mangler_; | |
176 int block_count_; | 159 int block_count_; |
177 int analysis_step_; | 160 int analysis_step_; |
178 }; | 161 }; |
179 | 162 |
180 } // namespace webrtc | 163 } // namespace webrtc |
181 | 164 |
182 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_ | 165 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_ |
OLD | NEW |