OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
hlundin-webrtc
2016/03/16 12:44:28
2016
peah-webrtc
2016/03/17 13:15:00
Great find!
Done.
| |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 #include <vector> | |
11 | |
12 #include "testing/gtest/include/gtest/gtest.h" | |
13 #include "webrtc/base/array_view.h" | |
14 #include "webrtc/base/random.h" | |
15 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
16 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" | |
17 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" | |
18 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" | |
19 | |
20 namespace webrtc { | |
21 namespace { | |
22 | |
23 // Process one frame of data and produce the output. | |
24 void ProcessOneFrame(int sample_rate_hz, | |
25 AudioBuffer* audio_buffer, | |
26 NoiseSuppressionImpl* noise_suppressor, | |
27 std::vector<float>* frame_output, | |
28 float* speech_probability, | |
29 std::vector<float>* noise_estimate) { | |
30 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
31 audio_buffer->SplitIntoFrequencyBands(); | |
32 } | |
33 | |
34 noise_suppressor->AnalyzeCaptureAudio(audio_buffer); | |
35 noise_suppressor->ProcessCaptureAudio(audio_buffer); | |
36 *speech_probability = noise_suppressor->speech_probability(); | |
37 *noise_estimate = noise_suppressor->NoiseEstimate(); | |
38 | |
39 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
40 audio_buffer->MergeFrequencyBands(); | |
41 } | |
42 } | |
43 | |
44 // Forms a predefined random test vector- | |
hlundin-webrtc
2016/03/16 12:44:27
End with '.', not '-'.
hlundin-webrtc
2016/03/16 12:44:28
Please, tell me what the range of the samples is.
peah-webrtc
2016/03/17 13:15:00
Done.
peah-webrtc
2016/03/17 13:15:00
Good point! This part of the code is now removed.
| |
45 void ConstructTestVector(int samples_per_channel, | |
hlundin-webrtc
2016/03/16 12:44:28
Make all int parameters size_t.
peah-webrtc
2016/03/17 13:15:00
Done.
| |
46 int num_channels, | |
47 int frame_counter, | |
48 Random* rand_gen, | |
49 std::vector<float>* testvector) { | |
50 testvector->resize(samples_per_channel * num_channels); | |
51 | |
52 bool low_level = ((frame_counter / 10) > 5); | |
hlundin-webrtc
2016/03/16 12:44:28
Why not
bool low_level = (frame_counter >= 60);
?
peah-webrtc
2016/03/17 13:15:00
Good point! The code is now removed.
| |
53 float scale = (low_level ? 0.01f : 1.0f); | |
hlundin-webrtc
2016/03/16 12:44:28
... or even
const float scale = frame_counter >= 6
peah-webrtc
2016/03/17 13:15:01
Another good point! The code is now removed.
| |
54 | |
55 for (auto& v : *testvector) { | |
56 v = scale * (2.0f * rand_gen->Rand<float>() - 1.0f); | |
57 } | |
58 } | |
59 | |
60 void SetupNoiseSuppressor(int sample_rate_hz, | |
61 int num_channels, | |
62 NoiseSuppressionImpl::Level level, | |
63 NoiseSuppressionImpl* noise_suppressor) { | |
64 noise_suppressor->Initialize(num_channels, sample_rate_hz); | |
65 noise_suppressor->Enable(true); | |
66 noise_suppressor->set_level(level); | |
67 } | |
68 | |
69 // Verifies the output of the test against a reference and reports the results | |
70 // using the gtest EXPECT_PRED_FORMAT2 functionality | |
71 void VerifyOutput(const StreamConfig& stream_config, | |
72 float speech_probability_reference, | |
73 const rtc::ArrayView<const float>& noise_estimate_reference, | |
74 const rtc::ArrayView<const float>& output_reference, | |
75 const std::vector<float>& output, | |
hlundin-webrtc
2016/03/16 12:44:27
Nit: the internal order of reference and test para
peah-webrtc
2016/03/17 13:15:00
Done.
| |
76 float speech_probability, | |
77 const std::vector<float>& noise_estimate) { | |
78 // Form vectors to compare the reference to. Only the first values of the | |
79 // outputs are compared in order not having to specify all preceeding frames | |
80 // as testvectors. | |
81 const size_t reference_frame_length = | |
82 output_reference.size() / stream_config.num_channels(); | |
hlundin-webrtc
2016/03/16 12:44:28
This should be an exact division, right? Consider
peah-webrtc
2016/03/17 13:15:00
Good point! Thanks for the suggestion!
Done.
| |
83 std::vector<float> output_to_verify; | |
84 for (size_t channel_no = 0; channel_no < stream_config.num_channels(); | |
85 ++channel_no) { | |
86 output_to_verify.insert( | |
87 output_to_verify.end(), | |
88 output.begin() + channel_no * stream_config.num_frames(), | |
89 output.begin() + channel_no * stream_config.num_frames() + | |
90 reference_frame_length); | |
91 } | |
92 | |
93 EXPECT_PRED_FORMAT2(test::AssertVectorsNotEqual, output_to_verify, | |
94 output_reference); | |
95 EXPECT_PRED_FORMAT2(test::AssertVectorsNotEqual, noise_estimate, | |
96 noise_estimate_reference); | |
97 EXPECT_PRED_FORMAT2(test::AssertFloatsNotEqual, speech_probability, | |
hlundin-webrtc
2016/03/16 12:44:28
Did you consider using EXPECT_FLOAT_EQ or EXPECT_N
peah-webrtc
2016/03/17 13:15:00
Great suggestion!
Done.
| |
98 speech_probability_reference); | |
99 } | |
100 | |
101 // Processes a specified amount of frames, verifies the results and reports | |
102 // any errors. | |
103 void RunBitexactnessTest( | |
104 int sample_rate_hz, | |
105 int num_channels, | |
hlundin-webrtc
2016/03/16 12:44:28
int -> size_t
peah-webrtc
2016/03/17 13:15:00
Done.
| |
106 int num_frames_to_process, | |
hlundin-webrtc
2016/03/16 12:44:28
int -> size_t
peah-webrtc
2016/03/17 13:15:00
Done.
| |
107 NoiseSuppressionImpl::Level level, | |
108 float speech_probability_reference, | |
109 const rtc::ArrayView<const float>& noise_estimate_reference, | |
110 const rtc::ArrayView<const float>& output_reference) { | |
111 Random rand_gen(42); | |
112 int samples_per_channel = 80 * sample_rate_hz / 8000; | |
hlundin-webrtc
2016/03/16 12:44:28
I think sample_rate_hz / 100 works just as well, w
peah-webrtc
2016/03/17 13:15:00
Done.
| |
113 const StreamConfig stream_config(sample_rate_hz, num_channels, false); | |
114 AudioBuffer audio_buffer( | |
115 stream_config.num_frames(), stream_config.num_channels(), | |
116 stream_config.num_frames(), stream_config.num_channels(), | |
117 stream_config.num_frames()); | |
118 | |
119 rtc::CriticalSection crit; | |
120 NoiseSuppressionImpl noise_suppressor(&crit); | |
121 SetupNoiseSuppressor(sample_rate_hz, num_channels, level, &noise_suppressor); | |
122 | |
123 std::vector<float> output; | |
124 float speech_probability = 0.0f; | |
125 std::vector<float> noise_estimate; | |
126 std::vector<float> frame_input; | |
127 std::vector<float> frame_output; | |
128 for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { | |
hlundin-webrtc
2016/03/16 12:44:28
size_t frame_no
peah-webrtc
2016/03/17 13:15:00
Done.
| |
129 ConstructTestVector(samples_per_channel, num_channels, frame_no, &rand_gen, | |
130 &frame_input); | |
131 | |
132 test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); | |
133 | |
134 ProcessOneFrame(sample_rate_hz, &audio_buffer, &noise_suppressor, &output, | |
135 &speech_probability, &noise_estimate); | |
136 | |
137 test::ExtractVectorFromAudioBuffer(stream_config, &audio_buffer, | |
138 &frame_output); | |
139 } | |
140 | |
141 // Compare the output to the reference. Only the first values of the output | |
hlundin-webrtc
2016/03/16 12:44:28
Compare ... with
hlundin-webrtc
2016/03/16 12:44:28
Wonky line-breaks in this paragraph.
peah-webrtc
2016/03/17 13:15:00
Done.
peah-webrtc
2016/03/17 13:15:01
Done.
| |
142 // from last frame processed | |
143 // is compared in order not having to specify all preceeding frames as | |
hlundin-webrtc
2016/03/16 12:44:27
are compared
peah-webrtc
2016/03/17 13:15:00
Done.
| |
144 // testvectors. | |
145 // As the algorithm being tested has a memory, testing only | |
146 // the last frame implicitly also tests the preceeding frames. | |
147 VerifyOutput(stream_config, speech_probability_reference, | |
148 noise_estimate_reference, output_reference, frame_output, | |
149 speech_probability, noise_estimate); | |
150 } | |
151 | |
152 } // namespace | |
153 | |
154 TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLowLevel) { | |
155 #if !defined(WEBRTC_ANDROID) | |
156 const float kOutputReference[] = {0.000921f, 0.003884f, -0.000689f}; | |
157 const float kNoiseEstimateReference[] = {0.028336f, 0.039530f, 0.042970f}; | |
158 const float kSpeechProbabilityReference = 0.122579f; | |
159 #else | |
160 const float kOutputReference[] = {0.000916f, 0.003876f, -0.000702f}; | |
161 const float kNoiseEstimateReference[] = {12.392536f, 13.370509f, 11.658783f}; | |
162 const float kSpeechProbabilityReference = -4.000000f; | |
163 #endif | |
164 | |
165 RunBitexactnessTest(8000, 1, 1000, NoiseSuppression::Level::kLow, | |
166 kSpeechProbabilityReference, | |
167 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
168 rtc::ArrayView<const float>(kOutputReference)); | |
169 } | |
170 | |
171 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLowLevel) { | |
172 #if !defined(WEBRTC_ANDROID) | |
173 const float kOutputReference[] = {0.002048f, 0.001845f, 0.003762f}; | |
174 const float kNoiseEstimateReference[] = {0.032741f, 0.052345f, 0.063557f}; | |
175 const float kSpeechProbabilityReference = 0.110951f; | |
176 #else | |
177 const float kOutputReference[] = {0.002014f, 0.001831f, 0.003754f}; | |
178 const float kNoiseEstimateReference[] = {7.268418f, 8.785124f, 8.383295f}; | |
179 const float kSpeechProbabilityReference = -4.000000f; | |
180 #endif | |
181 | |
182 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kLow, | |
183 kSpeechProbabilityReference, | |
184 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
185 rtc::ArrayView<const float>(kOutputReference)); | |
186 } | |
187 | |
188 TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLowLevel) { | |
189 #if !defined(WEBRTC_ANDROID) | |
190 const float kOutputReference[] = {-0.005249f, 0.001465f, -0.002533f}; | |
191 const float kNoiseEstimateReference[] = {0.025969f, 0.035012f, 0.035499f}; | |
192 const float kSpeechProbabilityReference = 0.139357f; | |
193 #else | |
194 const float kOutputReference[] = {-0.005219f, 0.001373f, -0.002472f}; | |
195 const float kNoiseEstimateReference[] = {12.616668f, 12.766106f, 11.475318f}; | |
196 const float kSpeechProbabilityReference = -4.000000f; | |
197 #endif | |
198 | |
199 RunBitexactnessTest(32000, 1, 1000, NoiseSuppression::Level::kLow, | |
200 kSpeechProbabilityReference, | |
201 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
202 rtc::ArrayView<const float>(kOutputReference)); | |
203 } | |
204 | |
205 TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLowLevel) { | |
206 #if !defined(WEBRTC_ANDROID) | |
207 const float kOutputReference[] = {0.001224f, 0.005314f, 0.002205f}; | |
208 const float kNoiseEstimateReference[] = {0.022175f, 0.031690f, 0.036631f}; | |
209 const float kSpeechProbabilityReference = 0.101083f; | |
210 #else | |
211 const float kOutputReference[] = {0.001181f, 0.005317f, 0.002217f}; | |
212 const float kNoiseEstimateReference[] = {7.594315f, 9.309500f, 9.561249f}; | |
213 const float kSpeechProbabilityReference = -4.000000f; | |
214 #endif | |
215 | |
216 RunBitexactnessTest(48000, 1, 1000, NoiseSuppression::Level::kLow, | |
217 kSpeechProbabilityReference, | |
218 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
219 rtc::ArrayView<const float>(kOutputReference)); | |
220 } | |
221 | |
222 TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLowLevel) { | |
223 #if !defined(WEBRTC_ANDROID) | |
224 const float kOutputReference[] = {0.000954f, 0.002081f, -0.001125f, | |
225 -0.003688f, 0.004999f, -0.004168f}; | |
226 const float kNoiseEstimateReference[] = {0.037820f, 0.054766f, 0.057829f}; | |
227 const float kSpeechProbabilityReference = 0.106168f; | |
228 #else | |
229 const float kOutputReference[] = {0.000946f, 0.002106f, -0.001099f, | |
230 -0.003693f, 0.004975f, -0.004181f}; | |
231 const float kNoiseEstimateReference[] = {9.096287f, 8.000648f, 8.823565f}; | |
232 const float kSpeechProbabilityReference = -4.000000f; | |
233 #endif | |
234 | |
235 RunBitexactnessTest(16000, 2, 1000, NoiseSuppression::Level::kLow, | |
236 kSpeechProbabilityReference, | |
237 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
238 rtc::ArrayView<const float>(kOutputReference)); | |
239 } | |
240 | |
241 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerateLevel) { | |
242 #if !defined(WEBRTC_ANDROID) | |
243 const float kOutputReference[] = {0.001036f, 0.000783f, 0.001862f}; | |
244 const float kNoiseEstimateReference[] = {0.032994f, 0.052608f, 0.065461f}; | |
245 const float kSpeechProbabilityReference = 0.110952f; | |
246 #else | |
247 const float kOutputReference[] = {0.000977f, 0.000763f, 0.001801f}; | |
248 const float kNoiseEstimateReference[] = {7.269972f, 8.785130f, 8.383298f}; | |
249 const float kSpeechProbabilityReference = -4.000000f; | |
250 #endif | |
251 | |
252 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kModerate, | |
253 kSpeechProbabilityReference, | |
254 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
255 rtc::ArrayView<const float>(kOutputReference)); | |
256 } | |
257 | |
258 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHighLevel) { | |
259 #if !defined(WEBRTC_ANDROID) | |
260 const float kOutputReference[] = {0.000533f, 0.000372f, 0.000948f}; | |
261 const float kNoiseEstimateReference[] = {0.037815f, 0.055497f, 0.065549f}; | |
262 const float kSpeechProbabilityReference = 0.110951f; | |
263 #else | |
264 const float kOutputReference[] = {0.000519f, 0.000336f, 0.000885f}; | |
265 const float kNoiseEstimateReference[] = {7.271456f, 8.785111f, 8.383295f}; | |
266 const float kSpeechProbabilityReference = -4.000000f; | |
267 #endif | |
268 | |
269 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kHigh, | |
270 kSpeechProbabilityReference, | |
271 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
272 rtc::ArrayView<const float>(kOutputReference)); | |
273 } | |
274 | |
275 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHighLevel) { | |
276 #if !defined(WEBRTC_ANDROID) | |
277 const float kOutputReference[] = {0.000367f, 0.000259f, 0.000647f}; | |
278 const float kNoiseEstimateReference[] = {0.038476f, 0.055677f, 0.065570f}; | |
279 const float kSpeechProbabilityReference = 0.110951f; | |
280 #else | |
281 const float kOutputReference[] = {0.000336f, 0.000244f, 0.000610f}; | |
282 const float kNoiseEstimateReference[] = {7.272960f, 8.785113f, 8.383295f}; | |
283 const float kSpeechProbabilityReference = -4.000000f; | |
284 #endif | |
285 | |
286 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kVeryHigh, | |
287 kSpeechProbabilityReference, | |
288 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
289 rtc::ArrayView<const float>(kOutputReference)); | |
290 } | |
291 | |
292 } // namespace webrtc | |
OLD | NEW |