Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
|
hlundin-webrtc
2016/03/16 15:50:20
2016
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 #include <vector> | |
| 11 | |
| 12 #include "testing/gtest/include/gtest/gtest.h" | |
| 13 #include "webrtc/base/array_view.h" | |
| 14 #include "webrtc/base/random.h" | |
| 15 #include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h" | |
| 16 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
| 17 #include "webrtc/modules/audio_processing/voice_detection_impl.h" | |
| 18 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" | |
| 19 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" | |
| 20 #include "webrtc/test/testsupport/fileutils.h" | |
| 21 | |
| 22 namespace webrtc { | |
| 23 namespace { | |
| 24 | |
| 25 enum TestSignalLevels { kLow, kMedium, kHigh }; | |
| 26 | |
| 27 ::testing::AssertionResult AssertLikelihoodsNotEqual( | |
|
hlundin-webrtc
2016/03/16 15:50:20
This is a lot of code to compare two enum (integer
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 28 const char* m_expr, | |
| 29 const char* n_expr, | |
| 30 const VoiceDetection::Likelihood& output, | |
| 31 const VoiceDetection::Likelihood& reference) { | |
| 32 // If the values are deemed not to be similar, return a report of the | |
| 33 // difference. | |
| 34 if (output != reference) { | |
| 35 // Lambda function that produces a string containing the likelihood name. | |
| 36 auto likelihood_description = [](VoiceDetection::Likelihood likelihood) { | |
| 37 switch (likelihood) { | |
| 38 case VoiceDetection::kVeryLowLikelihood: | |
| 39 return std::string("kVeryLowLikelihood"); | |
| 40 break; | |
| 41 case VoiceDetection::kLowLikelihood: | |
| 42 return std::string("kLowLikelihood"); | |
| 43 break; | |
| 44 case VoiceDetection::kModerateLikelihood: | |
| 45 return std::string("kModerateLikelihood"); | |
| 46 break; | |
| 47 case VoiceDetection::kHighLikelihood: | |
| 48 return std::string("kHighLikelihood"); | |
| 49 break; | |
| 50 default: | |
| 51 RTC_DCHECK(false); | |
| 52 return std::string(""); | |
| 53 } | |
| 54 }; | |
| 55 | |
| 56 return ::testing::AssertionFailure() | |
| 57 << "Actual: " << likelihood_description(output) << std::endl | |
| 58 << "Expected: " << likelihood_description(reference) << std::endl; | |
| 59 } | |
| 60 return ::testing::AssertionSuccess(); | |
| 61 } | |
| 62 | |
| 63 // Process one frame of data and produce the output. | |
| 64 void ProcessOneFrame(int sample_rate_hz, | |
| 65 AudioBuffer* audio_buffer, | |
| 66 VoiceDetectionImpl* voice_detection, | |
| 67 int* frame_size_ms, | |
| 68 bool* stream_has_voice, | |
| 69 VoiceDetection::Likelihood* likelihood) { | |
| 70 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
| 71 audio_buffer->SplitIntoFrequencyBands(); | |
| 72 } | |
| 73 | |
| 74 voice_detection->ProcessCaptureAudio(audio_buffer); | |
| 75 | |
| 76 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
|
hlundin-webrtc
2016/03/16 15:50:20
What is the rationale for merging the bands again?
peah-webrtc
2016/03/18 05:56:05
No point at all in this case, I will remove that.
| |
| 77 audio_buffer->MergeFrequencyBands(); | |
| 78 } | |
| 79 | |
| 80 *frame_size_ms = voice_detection->frame_size_ms(); | |
| 81 *stream_has_voice = voice_detection->stream_has_voice(); | |
| 82 *likelihood = voice_detection->likelihood(); | |
| 83 } | |
| 84 | |
| 85 // Forms a predefined random test vector. | |
| 86 void ConstructTestVector(int samples_per_channel, | |
| 87 int num_channels, | |
| 88 test::AudioLoop* audio_loop, | |
| 89 TestSignalLevels signal_level, | |
| 90 std::vector<float>* testvector) { | |
| 91 testvector->resize(samples_per_channel * num_channels); | |
| 92 auto input_samples = audio_loop->GetNextBlock(); | |
| 93 | |
| 94 float signal_gain = 0.0f; | |
|
hlundin-webrtc
2016/03/16 15:50:20
Consider:
float signal_gain = 0.1f; // TestSigna
peah-webrtc
2016/03/18 05:56:05
Thanks. This code is now removed.
| |
| 95 switch (signal_level) { | |
| 96 case TestSignalLevels::kLow: | |
| 97 signal_gain = 0.1f; | |
| 98 break; | |
| 99 case TestSignalLevels::kMedium: | |
| 100 signal_gain = 0.5f; | |
| 101 break; | |
| 102 case TestSignalLevels::kHigh: | |
| 103 signal_gain = 1.0f; | |
| 104 break; | |
| 105 default: | |
| 106 RTC_DCHECK(false); | |
| 107 } | |
| 108 | |
| 109 for (int k = 0; k < samples_per_channel; ++k) { | |
| 110 for (int j = 0; j < num_channels; ++j) { | |
| 111 (*testvector)[k * num_channels + j] = | |
| 112 signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f; | |
| 113 } | |
| 114 } | |
| 115 } | |
| 116 | |
| 117 void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) { | |
| 118 voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz); | |
| 119 voice_detection->Enable(true); | |
| 120 } | |
| 121 | |
| 122 std::string GetTestVectorFileName(int sample_rate_hz) { | |
|
hlundin-webrtc
2016/03/16 15:50:20
You can get rid of this function if you follow my
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 123 switch (sample_rate_hz) { | |
| 124 case 8000: | |
| 125 // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since | |
| 126 // the test is only a bitexactness test. | |
| 127 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", | |
| 128 "pcm"); | |
| 129 case 16000: | |
| 130 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", | |
| 131 "pcm"); | |
| 132 case 32000: | |
| 133 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", | |
| 134 "pcm"); | |
| 135 case 48000: | |
| 136 // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since | |
| 137 // the test is only a bitexactness test. | |
| 138 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", | |
| 139 "pcm"); | |
| 140 default: | |
| 141 RTC_DCHECK(false); | |
| 142 } | |
| 143 | |
| 144 return ""; | |
| 145 } | |
| 146 | |
| 147 // Processes a specified amount of frames, verifies the results and reports | |
| 148 // any errors. | |
| 149 void RunBitexactnessTest(int sample_rate_hz, | |
| 150 int num_channels, | |
| 151 int num_frames_to_process, | |
| 152 TestSignalLevels signal_level, | |
| 153 int frame_size_ms_reference, | |
| 154 bool stream_has_voice_reference, | |
| 155 VoiceDetection::Likelihood likelihood_reference) { | |
| 156 test::AudioLoop audio_loop; | |
|
hlundin-webrtc
2016/03/16 15:50:20
I would actually recommend you use a ResampleInput
peah-webrtc
2016/03/18 05:56:05
Thanks for the suggestion!!! I found some Resource
| |
| 157 int samples_per_channel = 80 * sample_rate_hz / 8000; | |
| 158 const StreamConfig stream_config(sample_rate_hz, num_channels, false); | |
| 159 AudioBuffer audio_buffer( | |
| 160 stream_config.num_frames(), stream_config.num_channels(), | |
| 161 stream_config.num_frames(), stream_config.num_channels(), | |
| 162 stream_config.num_frames()); | |
| 163 | |
| 164 std::string filename; | |
|
hlundin-webrtc
2016/03/16 15:50:20
Not used.
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 165 bool success = audio_loop.Init( | |
| 166 GetTestVectorFileName(sample_rate_hz), | |
| 167 num_frames_to_process * samples_per_channel * num_channels, | |
| 168 samples_per_channel * num_channels); | |
| 169 RTC_DCHECK(success); | |
|
hlundin-webrtc
2016/03/16 15:50:20
This is test code; you might as well CHECK things
peah-webrtc
2016/03/18 05:56:05
Good point!!!
Removed this one but will change oth
| |
| 170 | |
| 171 rtc::CriticalSection crit; | |
| 172 VoiceDetectionImpl voice_detection(&crit); | |
| 173 SetupComponent(sample_rate_hz, &voice_detection); | |
| 174 | |
| 175 std::vector<float> frame_input; | |
| 176 int frame_size_ms; | |
| 177 bool stream_has_voice; | |
| 178 VoiceDetection::Likelihood likelihood; | |
| 179 for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { | |
| 180 ConstructTestVector(samples_per_channel, num_channels, &audio_loop, | |
| 181 signal_level, &frame_input); | |
| 182 | |
| 183 test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); | |
| 184 | |
| 185 ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection, | |
| 186 &frame_size_ms, &stream_has_voice, &likelihood); | |
| 187 } | |
| 188 | |
| 189 // Compare the outputs to the references. | |
| 190 EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms, | |
|
hlundin-webrtc
2016/03/16 15:50:20
I think all of these should be changed to regular
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 191 frame_size_ms_reference); | |
| 192 EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice, | |
| 193 stream_has_voice_reference); | |
| 194 EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood, | |
| 195 likelihood_reference); | |
| 196 } | |
| 197 | |
| 198 const int kNumFramesToProcess = 1000; | |
| 199 | |
| 200 } // namespace | |
| 201 | |
| 202 TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) { | |
| 203 const int kFrameSizeMsReference = 10; | |
|
hlundin-webrtc
2016/03/16 15:50:20
kFrameSizeMsReference is always 10; define it once
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 204 const bool kStreamHAsVoiceReference = true; | |
|
hlundin-webrtc
2016/03/16 15:50:20
HAs -> Has
hlundin-webrtc
2016/03/16 15:50:20
Define once before the TESTs.
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:06
Done.
| |
| 205 const VoiceDetection::Likelihood kLlikelihoodReference = | |
|
hlundin-webrtc
2016/03/16 15:50:20
... and define it before the TESTs.
hlundin-webrtc
2016/03/16 15:50:20
kLli -> kLi
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:05
Done.
| |
| 206 VoiceDetection::kLowLikelihood; | |
| 207 | |
| 208 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
| 209 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 210 kLlikelihoodReference); | |
| 211 } | |
| 212 | |
| 213 TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) { | |
| 214 const int kFrameSizeMsReference = 10; | |
| 215 const bool kStreamHAsVoiceReference = true; | |
| 216 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 217 VoiceDetection::kLowLikelihood; | |
| 218 | |
| 219 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
| 220 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 221 kLlikelihoodReference); | |
| 222 } | |
| 223 | |
| 224 TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) { | |
| 225 const int kFrameSizeMsReference = 10; | |
| 226 const bool kStreamHAsVoiceReference = true; | |
| 227 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 228 VoiceDetection::kLowLikelihood; | |
| 229 | |
| 230 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
| 231 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 232 kLlikelihoodReference); | |
| 233 } | |
| 234 | |
| 235 TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) { | |
| 236 const int kFrameSizeMsReference = 10; | |
| 237 const bool kStreamHAsVoiceReference = true; | |
| 238 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 239 VoiceDetection::kLowLikelihood; | |
| 240 | |
| 241 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow, | |
| 242 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 243 kLlikelihoodReference); | |
| 244 } | |
| 245 | |
| 246 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) { | |
| 247 const int kFrameSizeMsReference = 10; | |
| 248 const bool kStreamHAsVoiceReference = true; | |
| 249 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 250 VoiceDetection::kLowLikelihood; | |
| 251 | |
| 252 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow, | |
| 253 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 254 kLlikelihoodReference); | |
| 255 } | |
| 256 | |
| 257 TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) { | |
| 258 const int kFrameSizeMsReference = 10; | |
| 259 const bool kStreamHAsVoiceReference = true; | |
| 260 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 261 VoiceDetection::kLowLikelihood; | |
| 262 | |
| 263 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
| 264 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 265 kLlikelihoodReference); | |
| 266 } | |
| 267 | |
| 268 TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) { | |
| 269 const int kFrameSizeMsReference = 10; | |
| 270 const bool kStreamHAsVoiceReference = true; | |
| 271 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 272 VoiceDetection::kLowLikelihood; | |
| 273 | |
| 274 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
| 275 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 276 kLlikelihoodReference); | |
| 277 } | |
| 278 | |
| 279 TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) { | |
| 280 const int kFrameSizeMsReference = 10; | |
| 281 const bool kStreamHAsVoiceReference = true; | |
| 282 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 283 VoiceDetection::kLowLikelihood; | |
| 284 | |
| 285 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
| 286 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 287 kLlikelihoodReference); | |
| 288 } | |
| 289 | |
| 290 TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) { | |
| 291 const int kFrameSizeMsReference = 10; | |
| 292 const bool kStreamHAsVoiceReference = true; | |
| 293 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 294 VoiceDetection::kLowLikelihood; | |
| 295 | |
| 296 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, | |
| 297 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 298 kLlikelihoodReference); | |
| 299 } | |
| 300 | |
| 301 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) { | |
| 302 const int kFrameSizeMsReference = 10; | |
| 303 const bool kStreamHAsVoiceReference = true; | |
| 304 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 305 VoiceDetection::kLowLikelihood; | |
| 306 | |
| 307 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium, | |
| 308 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 309 kLlikelihoodReference); | |
| 310 } | |
| 311 | |
| 312 TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) { | |
| 313 const int kFrameSizeMsReference = 10; | |
| 314 const bool kStreamHAsVoiceReference = true; | |
| 315 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 316 VoiceDetection::kLowLikelihood; | |
| 317 | |
| 318 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
| 319 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 320 kLlikelihoodReference); | |
| 321 } | |
| 322 | |
| 323 TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) { | |
| 324 const int kFrameSizeMsReference = 10; | |
| 325 const bool kStreamHAsVoiceReference = true; | |
| 326 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 327 VoiceDetection::kLowLikelihood; | |
| 328 | |
| 329 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
| 330 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 331 kLlikelihoodReference); | |
| 332 } | |
| 333 | |
| 334 TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) { | |
| 335 const int kFrameSizeMsReference = 10; | |
| 336 const bool kStreamHAsVoiceReference = true; | |
| 337 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 338 VoiceDetection::kLowLikelihood; | |
| 339 | |
| 340 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
| 341 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 342 kLlikelihoodReference); | |
| 343 } | |
| 344 | |
| 345 TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) { | |
| 346 const int kFrameSizeMsReference = 10; | |
| 347 const bool kStreamHAsVoiceReference = true; | |
| 348 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 349 VoiceDetection::kLowLikelihood; | |
| 350 | |
| 351 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, | |
| 352 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 353 kLlikelihoodReference); | |
| 354 } | |
| 355 | |
| 356 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) { | |
| 357 const int kFrameSizeMsReference = 10; | |
| 358 const bool kStreamHAsVoiceReference = true; | |
| 359 const VoiceDetection::Likelihood kLlikelihoodReference = | |
| 360 VoiceDetection::kLowLikelihood; | |
| 361 | |
| 362 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh, | |
| 363 kFrameSizeMsReference, kStreamHAsVoiceReference, | |
| 364 kLlikelihoodReference); | |
| 365 } | |
| 366 | |
| 367 } // namespace webrtc | |
| OLD | NEW |