OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
hlundin-webrtc
2016/03/16 15:50:20
2016
peah-webrtc
2016/03/18 05:56:05
Done.
| |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 #include <vector> | |
11 | |
12 #include "testing/gtest/include/gtest/gtest.h" | |
13 #include "webrtc/base/array_view.h" | |
14 #include "webrtc/base/random.h" | |
15 #include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h" | |
16 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
17 #include "webrtc/modules/audio_processing/voice_detection_impl.h" | |
18 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" | |
19 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" | |
20 #include "webrtc/test/testsupport/fileutils.h" | |
21 | |
22 namespace webrtc { | |
23 namespace { | |
24 | |
25 enum TestSignalLevels { kLow, kMedium, kHigh }; | |
26 | |
27 ::testing::AssertionResult AssertLikelihoodsNotEqual( | |
hlundin-webrtc
2016/03/16 15:50:20
This is a lot of code to compare two enum (integer
peah-webrtc
2016/03/18 05:56:05
Done.
| |
28 const char* m_expr, | |
29 const char* n_expr, | |
30 const VoiceDetection::Likelihood& output, | |
31 const VoiceDetection::Likelihood& reference) { | |
32 // If the values are deemed not to be similar, return a report of the | |
33 // difference. | |
34 if (output != reference) { | |
35 // Lambda function that produces a string containing the likelihood name. | |
36 auto likelihood_description = [](VoiceDetection::Likelihood likelihood) { | |
37 switch (likelihood) { | |
38 case VoiceDetection::kVeryLowLikelihood: | |
39 return std::string("kVeryLowLikelihood"); | |
40 break; | |
41 case VoiceDetection::kLowLikelihood: | |
42 return std::string("kLowLikelihood"); | |
43 break; | |
44 case VoiceDetection::kModerateLikelihood: | |
45 return std::string("kModerateLikelihood"); | |
46 break; | |
47 case VoiceDetection::kHighLikelihood: | |
48 return std::string("kHighLikelihood"); | |
49 break; | |
50 default: | |
51 RTC_DCHECK(false); | |
52 return std::string(""); | |
53 } | |
54 }; | |
55 | |
56 return ::testing::AssertionFailure() | |
57 << "Actual: " << likelihood_description(output) << std::endl | |
58 << "Expected: " << likelihood_description(reference) << std::endl; | |
59 } | |
60 return ::testing::AssertionSuccess(); | |
61 } | |
62 | |
63 // Process one frame of data and produce the output. | |
64 void ProcessOneFrame(int sample_rate_hz, | |
65 AudioBuffer* audio_buffer, | |
66 VoiceDetectionImpl* voice_detection, | |
67 int* frame_size_ms, | |
68 bool* stream_has_voice, | |
69 VoiceDetection::Likelihood* likelihood) { | |
70 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
71 audio_buffer->SplitIntoFrequencyBands(); | |
72 } | |
73 | |
74 voice_detection->ProcessCaptureAudio(audio_buffer); | |
75 | |
76 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
hlundin-webrtc
2016/03/16 15:50:20
What is the rationale for merging the bands again?
peah-webrtc
2016/03/18 05:56:05
No point at all in this case, I will remove that.
| |
77 audio_buffer->MergeFrequencyBands(); | |
78 } | |
79 | |
80 *frame_size_ms = voice_detection->frame_size_ms(); | |
81 *stream_has_voice = voice_detection->stream_has_voice(); | |
82 *likelihood = voice_detection->likelihood(); | |
83 } | |
84 | |
85 // Forms a predefined random test vector. | |
86 void ConstructTestVector(int samples_per_channel, | |
87 int num_channels, | |
88 test::AudioLoop* audio_loop, | |
89 TestSignalLevels signal_level, | |
90 std::vector<float>* testvector) { | |
91 testvector->resize(samples_per_channel * num_channels); | |
92 auto input_samples = audio_loop->GetNextBlock(); | |
93 | |
94 float signal_gain = 0.0f; | |
hlundin-webrtc
2016/03/16 15:50:20
Consider:
float signal_gain = 0.1f; // TestSigna
peah-webrtc
2016/03/18 05:56:05
Thanks. This code is now removed.
| |
95 switch (signal_level) { | |
96 case TestSignalLevels::kLow: | |
97 signal_gain = 0.1f; | |
98 break; | |
99 case TestSignalLevels::kMedium: | |
100 signal_gain = 0.5f; | |
101 break; | |
102 case TestSignalLevels::kHigh: | |
103 signal_gain = 1.0f; | |
104 break; | |
105 default: | |
106 RTC_DCHECK(false); | |
107 } | |
108 | |
109 for (int k = 0; k < samples_per_channel; ++k) { | |
110 for (int j = 0; j < num_channels; ++j) { | |
111 (*testvector)[k * num_channels + j] = | |
112 signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f; | |
113 } | |
114 } | |
115 } | |
116 | |
117 void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) { | |
118 voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz); | |
119 voice_detection->Enable(true); | |
120 } | |
121 | |
122 std::string GetTestVectorFileName(int sample_rate_hz) { | |
hlundin-webrtc
2016/03/16 15:50:20
You can get rid of this function if you follow my
peah-webrtc
2016/03/18 05:56:05
Done.
| |
123 switch (sample_rate_hz) { | |
124 case 8000: | |
125 // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since | |
126 // the test is only a bitexactness test. | |
127 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", | |
128 "pcm"); | |
129 case 16000: | |
130 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", | |
131 "pcm"); | |
132 case 32000: | |
133 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", | |
134 "pcm"); | |
135 case 48000: | |
136 // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since | |
137 // the test is only a bitexactness test. | |
138 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", | |
139 "pcm"); | |
140 default: | |
141 RTC_DCHECK(false); | |
142 } | |
143 | |
144 return ""; | |
145 } | |
146 | |
147 // Processes a specified amount of frames, verifies the results and reports | |
148 // any errors. | |
149 void RunBitexactnessTest(int sample_rate_hz, | |
150 int num_channels, | |
151 int num_frames_to_process, | |
152 TestSignalLevels signal_level, | |
153 int frame_size_ms_reference, | |
154 bool stream_has_voice_reference, | |
155 VoiceDetection::Likelihood likelihood_reference) { | |
156 test::AudioLoop audio_loop; | |
hlundin-webrtc
2016/03/16 15:50:20
I would actually recommend you use a ResampleInput
peah-webrtc
2016/03/18 05:56:05
Thanks for the suggestion!!! I found some Resource
| |
157 int samples_per_channel = 80 * sample_rate_hz / 8000; | |
158 const StreamConfig stream_config(sample_rate_hz, num_channels, false); | |
159 AudioBuffer audio_buffer( | |
160 stream_config.num_frames(), stream_config.num_channels(), | |
161 stream_config.num_frames(), stream_config.num_channels(), | |
162 stream_config.num_frames()); | |
163 | |
164 std::string filename; | |
hlundin-webrtc
2016/03/16 15:50:20
Not used.
peah-webrtc
2016/03/18 05:56:05
Done.
| |
165 bool success = audio_loop.Init( | |
166 GetTestVectorFileName(sample_rate_hz), | |
167 num_frames_to_process * samples_per_channel * num_channels, | |
168 samples_per_channel * num_channels); | |
169 RTC_DCHECK(success); | |
hlundin-webrtc
2016/03/16 15:50:20
This is test code; you might as well CHECK things
peah-webrtc
2016/03/18 05:56:05
Good point!!!
Removed this one but will change oth
| |
170 | |
171 rtc::CriticalSection crit; | |
172 VoiceDetectionImpl voice_detection(&crit); | |
173 SetupComponent(sample_rate_hz, &voice_detection); | |
174 | |
175 std::vector<float> frame_input; | |
176 int frame_size_ms; | |
177 bool stream_has_voice; | |
178 VoiceDetection::Likelihood likelihood; | |
179 for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { | |
180 ConstructTestVector(samples_per_channel, num_channels, &audio_loop, | |
181 signal_level, &frame_input); | |
182 | |
183 test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); | |
184 | |
185 ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection, | |
186 &frame_size_ms, &stream_has_voice, &likelihood); | |
187 } | |
188 | |
189 // Compare the outputs to the references. | |
190 EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms, | |
hlundin-webrtc
2016/03/16 15:50:20
I think all of these should be changed to regular
peah-webrtc
2016/03/18 05:56:05
Done.
| |
191 frame_size_ms_reference); | |
192 EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice, | |
193 stream_has_voice_reference); | |
194 EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood, | |
195 likelihood_reference); | |
196 } | |
197 | |
198 const int kNumFramesToProcess = 1000; | |
199 | |
200 } // namespace | |
201 | |
202 TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) { | |
203 const int kFrameSizeMsReference = 10; | |
hlundin-webrtc
2016/03/16 15:50:20
kFrameSizeMsReference is always 10; define it once
peah-webrtc
2016/03/18 05:56:05
Done.
| |
204 const bool kStreamHAsVoiceReference = true; | |
hlundin-webrtc
2016/03/16 15:50:20
HAs -> Has
hlundin-webrtc
2016/03/16 15:50:20
Define once before the TESTs.
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:06
Done.
| |
205 const VoiceDetection::Likelihood kLlikelihoodReference = | |
hlundin-webrtc
2016/03/16 15:50:20
... and define it before the TESTs.
hlundin-webrtc
2016/03/16 15:50:20
kLli -> kLi
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:05
Done.
| |
206 VoiceDetection::kLowLikelihood; | |
207 | |
208 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
209 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
210 kLlikelihoodReference); | |
211 } | |
212 | |
213 TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) { | |
214 const int kFrameSizeMsReference = 10; | |
215 const bool kStreamHAsVoiceReference = true; | |
216 const VoiceDetection::Likelihood kLlikelihoodReference = | |
217 VoiceDetection::kLowLikelihood; | |
218 | |
219 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
220 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
221 kLlikelihoodReference); | |
222 } | |
223 | |
224 TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) { | |
225 const int kFrameSizeMsReference = 10; | |
226 const bool kStreamHAsVoiceReference = true; | |
227 const VoiceDetection::Likelihood kLlikelihoodReference = | |
228 VoiceDetection::kLowLikelihood; | |
229 | |
230 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
231 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
232 kLlikelihoodReference); | |
233 } | |
234 | |
235 TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) { | |
236 const int kFrameSizeMsReference = 10; | |
237 const bool kStreamHAsVoiceReference = true; | |
238 const VoiceDetection::Likelihood kLlikelihoodReference = | |
239 VoiceDetection::kLowLikelihood; | |
240 | |
241 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
242 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
243 kLlikelihoodReference); | |
244 } | |
245 | |
246 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) { | |
247 const int kFrameSizeMsReference = 10; | |
248 const bool kStreamHAsVoiceReference = true; | |
249 const VoiceDetection::Likelihood kLlikelihoodReference = | |
250 VoiceDetection::kLowLikelihood; | |
251 | |
252 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow, | |
253 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
254 kLlikelihoodReference); | |
255 } | |
256 | |
257 TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) { | |
258 const int kFrameSizeMsReference = 10; | |
259 const bool kStreamHAsVoiceReference = true; | |
260 const VoiceDetection::Likelihood kLlikelihoodReference = | |
261 VoiceDetection::kLowLikelihood; | |
262 | |
263 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
264 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
265 kLlikelihoodReference); | |
266 } | |
267 | |
268 TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) { | |
269 const int kFrameSizeMsReference = 10; | |
270 const bool kStreamHAsVoiceReference = true; | |
271 const VoiceDetection::Likelihood kLlikelihoodReference = | |
272 VoiceDetection::kLowLikelihood; | |
273 | |
274 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
275 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
276 kLlikelihoodReference); | |
277 } | |
278 | |
279 TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) { | |
280 const int kFrameSizeMsReference = 10; | |
281 const bool kStreamHAsVoiceReference = true; | |
282 const VoiceDetection::Likelihood kLlikelihoodReference = | |
283 VoiceDetection::kLowLikelihood; | |
284 | |
285 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
286 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
287 kLlikelihoodReference); | |
288 } | |
289 | |
290 TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) { | |
291 const int kFrameSizeMsReference = 10; | |
292 const bool kStreamHAsVoiceReference = true; | |
293 const VoiceDetection::Likelihood kLlikelihoodReference = | |
294 VoiceDetection::kLowLikelihood; | |
295 | |
296 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
297 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
298 kLlikelihoodReference); | |
299 } | |
300 | |
301 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) { | |
302 const int kFrameSizeMsReference = 10; | |
303 const bool kStreamHAsVoiceReference = true; | |
304 const VoiceDetection::Likelihood kLlikelihoodReference = | |
305 VoiceDetection::kLowLikelihood; | |
306 | |
307 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium, | |
308 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
309 kLlikelihoodReference); | |
310 } | |
311 | |
312 TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) { | |
313 const int kFrameSizeMsReference = 10; | |
314 const bool kStreamHAsVoiceReference = true; | |
315 const VoiceDetection::Likelihood kLlikelihoodReference = | |
316 VoiceDetection::kLowLikelihood; | |
317 | |
318 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
319 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
320 kLlikelihoodReference); | |
321 } | |
322 | |
323 TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) { | |
324 const int kFrameSizeMsReference = 10; | |
325 const bool kStreamHAsVoiceReference = true; | |
326 const VoiceDetection::Likelihood kLlikelihoodReference = | |
327 VoiceDetection::kLowLikelihood; | |
328 | |
329 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
330 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
331 kLlikelihoodReference); | |
332 } | |
333 | |
334 TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) { | |
335 const int kFrameSizeMsReference = 10; | |
336 const bool kStreamHAsVoiceReference = true; | |
337 const VoiceDetection::Likelihood kLlikelihoodReference = | |
338 VoiceDetection::kLowLikelihood; | |
339 | |
340 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
341 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
342 kLlikelihoodReference); | |
343 } | |
344 | |
345 TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) { | |
346 const int kFrameSizeMsReference = 10; | |
347 const bool kStreamHAsVoiceReference = true; | |
348 const VoiceDetection::Likelihood kLlikelihoodReference = | |
349 VoiceDetection::kLowLikelihood; | |
350 | |
351 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
352 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
353 kLlikelihoodReference); | |
354 } | |
355 | |
356 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) { | |
357 const int kFrameSizeMsReference = 10; | |
358 const bool kStreamHAsVoiceReference = true; | |
359 const VoiceDetection::Likelihood kLlikelihoodReference = | |
360 VoiceDetection::kLowLikelihood; | |
361 | |
362 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh, | |
363 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
364 kLlikelihoodReference); | |
365 } | |
366 | |
367 } // namespace webrtc | |
OLD | NEW |