Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(35)

Side by Side Diff: webrtc/modules/audio_processing/voice_detection_unittest.cc

Issue 1804373002: Added a bitexactness test for the voice activity detector in the audio processing module. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@LevelEstimatorBitExactness_CL
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
hlundin-webrtc 2016/03/16 15:50:20 2016
peah-webrtc 2016/03/18 05:56:05 Done.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include <vector>
11
12 #include "testing/gtest/include/gtest/gtest.h"
13 #include "webrtc/base/array_view.h"
14 #include "webrtc/base/random.h"
15 #include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h"
16 #include "webrtc/modules/audio_processing/audio_buffer.h"
17 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
18 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h"
19 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h"
20 #include "webrtc/test/testsupport/fileutils.h"
21
22 namespace webrtc {
23 namespace {
24
25 enum TestSignalLevels { kLow, kMedium, kHigh };
26
27 ::testing::AssertionResult AssertLikelihoodsNotEqual(
hlundin-webrtc 2016/03/16 15:50:20 This is a lot of code to compare two enum (integer
peah-webrtc 2016/03/18 05:56:05 Done.
28 const char* m_expr,
29 const char* n_expr,
30 const VoiceDetection::Likelihood& output,
31 const VoiceDetection::Likelihood& reference) {
32 // If the values are deemed not to be similar, return a report of the
33 // difference.
34 if (output != reference) {
35 // Lambda function that produces a string containing the likelihood name.
36 auto likelihood_description = [](VoiceDetection::Likelihood likelihood) {
37 switch (likelihood) {
38 case VoiceDetection::kVeryLowLikelihood:
39 return std::string("kVeryLowLikelihood");
40 break;
41 case VoiceDetection::kLowLikelihood:
42 return std::string("kLowLikelihood");
43 break;
44 case VoiceDetection::kModerateLikelihood:
45 return std::string("kModerateLikelihood");
46 break;
47 case VoiceDetection::kHighLikelihood:
48 return std::string("kHighLikelihood");
49 break;
50 default:
51 RTC_DCHECK(false);
52 return std::string("");
53 }
54 };
55
56 return ::testing::AssertionFailure()
57 << "Actual: " << likelihood_description(output) << std::endl
58 << "Expected: " << likelihood_description(reference) << std::endl;
59 }
60 return ::testing::AssertionSuccess();
61 }
62
63 // Process one frame of data and produce the output.
64 void ProcessOneFrame(int sample_rate_hz,
65 AudioBuffer* audio_buffer,
66 VoiceDetectionImpl* voice_detection,
67 int* frame_size_ms,
68 bool* stream_has_voice,
69 VoiceDetection::Likelihood* likelihood) {
70 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
71 audio_buffer->SplitIntoFrequencyBands();
72 }
73
74 voice_detection->ProcessCaptureAudio(audio_buffer);
75
76 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
hlundin-webrtc 2016/03/16 15:50:20 What is the rationale for merging the bands again?
peah-webrtc 2016/03/18 05:56:05 No point at all in this case, I will remove that.
77 audio_buffer->MergeFrequencyBands();
78 }
79
80 *frame_size_ms = voice_detection->frame_size_ms();
81 *stream_has_voice = voice_detection->stream_has_voice();
82 *likelihood = voice_detection->likelihood();
83 }
84
85 // Forms a predefined random test vector.
86 void ConstructTestVector(int samples_per_channel,
87 int num_channels,
88 test::AudioLoop* audio_loop,
89 TestSignalLevels signal_level,
90 std::vector<float>* testvector) {
91 testvector->resize(samples_per_channel * num_channels);
92 auto input_samples = audio_loop->GetNextBlock();
93
94 float signal_gain = 0.0f;
hlundin-webrtc 2016/03/16 15:50:20 Consider: float signal_gain = 0.1f; // TestSigna
peah-webrtc 2016/03/18 05:56:05 Thanks. This code is now removed.
95 switch (signal_level) {
96 case TestSignalLevels::kLow:
97 signal_gain = 0.1f;
98 break;
99 case TestSignalLevels::kMedium:
100 signal_gain = 0.5f;
101 break;
102 case TestSignalLevels::kHigh:
103 signal_gain = 1.0f;
104 break;
105 default:
106 RTC_DCHECK(false);
107 }
108
109 for (int k = 0; k < samples_per_channel; ++k) {
110 for (int j = 0; j < num_channels; ++j) {
111 (*testvector)[k * num_channels + j] =
112 signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f;
113 }
114 }
115 }
116
117 void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) {
118 voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz);
119 voice_detection->Enable(true);
120 }
121
122 std::string GetTestVectorFileName(int sample_rate_hz) {
hlundin-webrtc 2016/03/16 15:50:20 You can get rid of this function if you follow my
peah-webrtc 2016/03/18 05:56:05 Done.
123 switch (sample_rate_hz) {
124 case 8000:
125 // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since
126 // the test is only a bitexactness test.
127 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",
128 "pcm");
129 case 16000:
130 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",
131 "pcm");
132 case 32000:
133 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",
134 "pcm");
135 case 48000:
136 // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since
137 // the test is only a bitexactness test.
138 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",
139 "pcm");
140 default:
141 RTC_DCHECK(false);
142 }
143
144 return "";
145 }
146
147 // Processes a specified amount of frames, verifies the results and reports
148 // any errors.
149 void RunBitexactnessTest(int sample_rate_hz,
150 int num_channels,
151 int num_frames_to_process,
152 TestSignalLevels signal_level,
153 int frame_size_ms_reference,
154 bool stream_has_voice_reference,
155 VoiceDetection::Likelihood likelihood_reference) {
156 test::AudioLoop audio_loop;
hlundin-webrtc 2016/03/16 15:50:20 I would actually recommend you use a ResampleInput
peah-webrtc 2016/03/18 05:56:05 Thanks for the suggestion!!! I found some Resource
157 int samples_per_channel = 80 * sample_rate_hz / 8000;
158 const StreamConfig stream_config(sample_rate_hz, num_channels, false);
159 AudioBuffer audio_buffer(
160 stream_config.num_frames(), stream_config.num_channels(),
161 stream_config.num_frames(), stream_config.num_channels(),
162 stream_config.num_frames());
163
164 std::string filename;
hlundin-webrtc 2016/03/16 15:50:20 Not used.
peah-webrtc 2016/03/18 05:56:05 Done.
165 bool success = audio_loop.Init(
166 GetTestVectorFileName(sample_rate_hz),
167 num_frames_to_process * samples_per_channel * num_channels,
168 samples_per_channel * num_channels);
169 RTC_DCHECK(success);
hlundin-webrtc 2016/03/16 15:50:20 This is test code; you might as well CHECK things
peah-webrtc 2016/03/18 05:56:05 Good point!!! Removed this one but will change oth
170
171 rtc::CriticalSection crit;
172 VoiceDetectionImpl voice_detection(&crit);
173 SetupComponent(sample_rate_hz, &voice_detection);
174
175 std::vector<float> frame_input;
176 int frame_size_ms;
177 bool stream_has_voice;
178 VoiceDetection::Likelihood likelihood;
179 for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) {
180 ConstructTestVector(samples_per_channel, num_channels, &audio_loop,
181 signal_level, &frame_input);
182
183 test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer);
184
185 ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection,
186 &frame_size_ms, &stream_has_voice, &likelihood);
187 }
188
189 // Compare the outputs to the references.
190 EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms,
hlundin-webrtc 2016/03/16 15:50:20 I think all of these should be changed to regular
peah-webrtc 2016/03/18 05:56:05 Done.
191 frame_size_ms_reference);
192 EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice,
193 stream_has_voice_reference);
194 EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood,
195 likelihood_reference);
196 }
197
198 const int kNumFramesToProcess = 1000;
199
200 } // namespace
201
202 TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) {
203 const int kFrameSizeMsReference = 10;
hlundin-webrtc 2016/03/16 15:50:20 kFrameSizeMsReference is always 10; define it once
peah-webrtc 2016/03/18 05:56:05 Done.
204 const bool kStreamHAsVoiceReference = true;
hlundin-webrtc 2016/03/16 15:50:20 HAs -> Has
hlundin-webrtc 2016/03/16 15:50:20 Define once before the TESTs.
peah-webrtc 2016/03/18 05:56:05 Done.
peah-webrtc 2016/03/18 05:56:06 Done.
205 const VoiceDetection::Likelihood kLlikelihoodReference =
hlundin-webrtc 2016/03/16 15:50:20 ... and define it before the TESTs.
hlundin-webrtc 2016/03/16 15:50:20 kLli -> kLi
peah-webrtc 2016/03/18 05:56:05 Done.
peah-webrtc 2016/03/18 05:56:05 Done.
206 VoiceDetection::kLowLikelihood;
207
208 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
209 kFrameSizeMsReference, kStreamHAsVoiceReference,
210 kLlikelihoodReference);
211 }
212
213 TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) {
214 const int kFrameSizeMsReference = 10;
215 const bool kStreamHAsVoiceReference = true;
216 const VoiceDetection::Likelihood kLlikelihoodReference =
217 VoiceDetection::kLowLikelihood;
218
219 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
220 kFrameSizeMsReference, kStreamHAsVoiceReference,
221 kLlikelihoodReference);
222 }
223
224 TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) {
225 const int kFrameSizeMsReference = 10;
226 const bool kStreamHAsVoiceReference = true;
227 const VoiceDetection::Likelihood kLlikelihoodReference =
228 VoiceDetection::kLowLikelihood;
229
230 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
231 kFrameSizeMsReference, kStreamHAsVoiceReference,
232 kLlikelihoodReference);
233 }
234
235 TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) {
236 const int kFrameSizeMsReference = 10;
237 const bool kStreamHAsVoiceReference = true;
238 const VoiceDetection::Likelihood kLlikelihoodReference =
239 VoiceDetection::kLowLikelihood;
240
241 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
242 kFrameSizeMsReference, kStreamHAsVoiceReference,
243 kLlikelihoodReference);
244 }
245
246 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) {
247 const int kFrameSizeMsReference = 10;
248 const bool kStreamHAsVoiceReference = true;
249 const VoiceDetection::Likelihood kLlikelihoodReference =
250 VoiceDetection::kLowLikelihood;
251
252 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow,
253 kFrameSizeMsReference, kStreamHAsVoiceReference,
254 kLlikelihoodReference);
255 }
256
257 TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) {
258 const int kFrameSizeMsReference = 10;
259 const bool kStreamHAsVoiceReference = true;
260 const VoiceDetection::Likelihood kLlikelihoodReference =
261 VoiceDetection::kLowLikelihood;
262
263 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
264 kFrameSizeMsReference, kStreamHAsVoiceReference,
265 kLlikelihoodReference);
266 }
267
268 TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) {
269 const int kFrameSizeMsReference = 10;
270 const bool kStreamHAsVoiceReference = true;
271 const VoiceDetection::Likelihood kLlikelihoodReference =
272 VoiceDetection::kLowLikelihood;
273
274 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
275 kFrameSizeMsReference, kStreamHAsVoiceReference,
276 kLlikelihoodReference);
277 }
278
279 TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) {
280 const int kFrameSizeMsReference = 10;
281 const bool kStreamHAsVoiceReference = true;
282 const VoiceDetection::Likelihood kLlikelihoodReference =
283 VoiceDetection::kLowLikelihood;
284
285 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
286 kFrameSizeMsReference, kStreamHAsVoiceReference,
287 kLlikelihoodReference);
288 }
289
290 TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) {
291 const int kFrameSizeMsReference = 10;
292 const bool kStreamHAsVoiceReference = true;
293 const VoiceDetection::Likelihood kLlikelihoodReference =
294 VoiceDetection::kLowLikelihood;
295
296 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
297 kFrameSizeMsReference, kStreamHAsVoiceReference,
298 kLlikelihoodReference);
299 }
300
301 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) {
302 const int kFrameSizeMsReference = 10;
303 const bool kStreamHAsVoiceReference = true;
304 const VoiceDetection::Likelihood kLlikelihoodReference =
305 VoiceDetection::kLowLikelihood;
306
307 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium,
308 kFrameSizeMsReference, kStreamHAsVoiceReference,
309 kLlikelihoodReference);
310 }
311
312 TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) {
313 const int kFrameSizeMsReference = 10;
314 const bool kStreamHAsVoiceReference = true;
315 const VoiceDetection::Likelihood kLlikelihoodReference =
316 VoiceDetection::kLowLikelihood;
317
318 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
319 kFrameSizeMsReference, kStreamHAsVoiceReference,
320 kLlikelihoodReference);
321 }
322
323 TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) {
324 const int kFrameSizeMsReference = 10;
325 const bool kStreamHAsVoiceReference = true;
326 const VoiceDetection::Likelihood kLlikelihoodReference =
327 VoiceDetection::kLowLikelihood;
328
329 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
330 kFrameSizeMsReference, kStreamHAsVoiceReference,
331 kLlikelihoodReference);
332 }
333
334 TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) {
335 const int kFrameSizeMsReference = 10;
336 const bool kStreamHAsVoiceReference = true;
337 const VoiceDetection::Likelihood kLlikelihoodReference =
338 VoiceDetection::kLowLikelihood;
339
340 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
341 kFrameSizeMsReference, kStreamHAsVoiceReference,
342 kLlikelihoodReference);
343 }
344
345 TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) {
346 const int kFrameSizeMsReference = 10;
347 const bool kStreamHAsVoiceReference = true;
348 const VoiceDetection::Likelihood kLlikelihoodReference =
349 VoiceDetection::kLowLikelihood;
350
351 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
352 kFrameSizeMsReference, kStreamHAsVoiceReference,
353 kLlikelihoodReference);
354 }
355
356 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) {
357 const int kFrameSizeMsReference = 10;
358 const bool kStreamHAsVoiceReference = true;
359 const VoiceDetection::Likelihood kLlikelihoodReference =
360 VoiceDetection::kLowLikelihood;
361
362 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh,
363 kFrameSizeMsReference, kStreamHAsVoiceReference,
364 kLlikelihoodReference);
365 }
366
367 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698