Index: webrtc/modules/audio_processing/voice_detection_unittest.cc |
diff --git a/webrtc/modules/audio_processing/voice_detection_unittest.cc b/webrtc/modules/audio_processing/voice_detection_unittest.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..4d1b6a920f85416b3f4ca6bd3f1651c966717633 |
--- /dev/null |
+++ b/webrtc/modules/audio_processing/voice_detection_unittest.cc |
@@ -0,0 +1,367 @@ |
+/* |
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
hlundin-webrtc
2016/03/16 15:50:20
2016
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ * |
+ * Use of this source code is governed by a BSD-style license |
+ * that can be found in the LICENSE file in the root of the source |
+ * tree. An additional intellectual property rights grant can be found |
+ * in the file PATENTS. All contributing project authors may |
+ * be found in the AUTHORS file in the root of the source tree. |
+ */ |
+#include <vector> |
+ |
+#include "testing/gtest/include/gtest/gtest.h" |
+#include "webrtc/base/array_view.h" |
+#include "webrtc/base/random.h" |
+#include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h" |
+#include "webrtc/modules/audio_processing/audio_buffer.h" |
+#include "webrtc/modules/audio_processing/voice_detection_impl.h" |
+#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" |
+#include "webrtc/modules/audio_processing/test/bitexactness_tools.h" |
+#include "webrtc/test/testsupport/fileutils.h" |
+ |
+namespace webrtc { |
+namespace { |
+ |
+enum TestSignalLevels { kLow, kMedium, kHigh }; |
+ |
+::testing::AssertionResult AssertLikelihoodsNotEqual( |
hlundin-webrtc
2016/03/16 15:50:20
This is a lot of code to compare two enum (integer
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ const char* m_expr, |
+ const char* n_expr, |
+ const VoiceDetection::Likelihood& output, |
+ const VoiceDetection::Likelihood& reference) { |
+ // If the values are deemed not to be similar, return a report of the |
+ // difference. |
+ if (output != reference) { |
+ // Lambda function that produces a string containing the likelihood name. |
+ auto likelihood_description = [](VoiceDetection::Likelihood likelihood) { |
+ switch (likelihood) { |
+ case VoiceDetection::kVeryLowLikelihood: |
+ return std::string("kVeryLowLikelihood"); |
+ break; |
+ case VoiceDetection::kLowLikelihood: |
+ return std::string("kLowLikelihood"); |
+ break; |
+ case VoiceDetection::kModerateLikelihood: |
+ return std::string("kModerateLikelihood"); |
+ break; |
+ case VoiceDetection::kHighLikelihood: |
+ return std::string("kHighLikelihood"); |
+ break; |
+ default: |
+ RTC_DCHECK(false); |
+ return std::string(""); |
+ } |
+ }; |
+ |
+ return ::testing::AssertionFailure() |
+ << "Actual: " << likelihood_description(output) << std::endl |
+ << "Expected: " << likelihood_description(reference) << std::endl; |
+ } |
+ return ::testing::AssertionSuccess(); |
+} |
+ |
+// Process one frame of data and produce the output. |
+void ProcessOneFrame(int sample_rate_hz, |
+ AudioBuffer* audio_buffer, |
+ VoiceDetectionImpl* voice_detection, |
+ int* frame_size_ms, |
+ bool* stream_has_voice, |
+ VoiceDetection::Likelihood* likelihood) { |
+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
+ audio_buffer->SplitIntoFrequencyBands(); |
+ } |
+ |
+ voice_detection->ProcessCaptureAudio(audio_buffer); |
+ |
+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
hlundin-webrtc
2016/03/16 15:50:20
What is the rationale for merging the bands again?
peah-webrtc
2016/03/18 05:56:05
No point at all in this case, I will remove that.
|
+ audio_buffer->MergeFrequencyBands(); |
+ } |
+ |
+ *frame_size_ms = voice_detection->frame_size_ms(); |
+ *stream_has_voice = voice_detection->stream_has_voice(); |
+ *likelihood = voice_detection->likelihood(); |
+} |
+ |
+// Forms a predefined random test vector. |
+void ConstructTestVector(int samples_per_channel, |
+ int num_channels, |
+ test::AudioLoop* audio_loop, |
+ TestSignalLevels signal_level, |
+ std::vector<float>* testvector) { |
+ testvector->resize(samples_per_channel * num_channels); |
+ auto input_samples = audio_loop->GetNextBlock(); |
+ |
+ float signal_gain = 0.0f; |
hlundin-webrtc
2016/03/16 15:50:20
Consider:
float signal_gain = 0.1f; // TestSigna
peah-webrtc
2016/03/18 05:56:05
Thanks. This code is now removed.
|
+ switch (signal_level) { |
+ case TestSignalLevels::kLow: |
+ signal_gain = 0.1f; |
+ break; |
+ case TestSignalLevels::kMedium: |
+ signal_gain = 0.5f; |
+ break; |
+ case TestSignalLevels::kHigh: |
+ signal_gain = 1.0f; |
+ break; |
+ default: |
+ RTC_DCHECK(false); |
+ } |
+ |
+ for (int k = 0; k < samples_per_channel; ++k) { |
+ for (int j = 0; j < num_channels; ++j) { |
+ (*testvector)[k * num_channels + j] = |
+ signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f; |
+ } |
+ } |
+} |
+ |
+void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) { |
+ voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz); |
+ voice_detection->Enable(true); |
+} |
+ |
+std::string GetTestVectorFileName(int sample_rate_hz) { |
hlundin-webrtc
2016/03/16 15:50:20
You can get rid of this function if you follow my
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ switch (sample_rate_hz) { |
+ case 8000: |
+ // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since |
+ // the test is only a bitexactness test. |
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", |
+ "pcm"); |
+ case 16000: |
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", |
+ "pcm"); |
+ case 32000: |
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", |
+ "pcm"); |
+ case 48000: |
+ // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since |
+ // the test is only a bitexactness test. |
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", |
+ "pcm"); |
+ default: |
+ RTC_DCHECK(false); |
+ } |
+ |
+ return ""; |
+} |
+ |
+// Processes a specified amount of frames, verifies the results and reports |
+// any errors. |
+void RunBitexactnessTest(int sample_rate_hz, |
+ int num_channels, |
+ int num_frames_to_process, |
+ TestSignalLevels signal_level, |
+ int frame_size_ms_reference, |
+ bool stream_has_voice_reference, |
+ VoiceDetection::Likelihood likelihood_reference) { |
+ test::AudioLoop audio_loop; |
hlundin-webrtc
2016/03/16 15:50:20
I would actually recommend you use a ResampleInput
peah-webrtc
2016/03/18 05:56:05
Thanks for the suggestion!!! I found some Resource
|
+ int samples_per_channel = 80 * sample_rate_hz / 8000; |
+ const StreamConfig stream_config(sample_rate_hz, num_channels, false); |
+ AudioBuffer audio_buffer( |
+ stream_config.num_frames(), stream_config.num_channels(), |
+ stream_config.num_frames(), stream_config.num_channels(), |
+ stream_config.num_frames()); |
+ |
+ std::string filename; |
hlundin-webrtc
2016/03/16 15:50:20
Not used.
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ bool success = audio_loop.Init( |
+ GetTestVectorFileName(sample_rate_hz), |
+ num_frames_to_process * samples_per_channel * num_channels, |
+ samples_per_channel * num_channels); |
+ RTC_DCHECK(success); |
hlundin-webrtc
2016/03/16 15:50:20
This is test code; you might as well CHECK things
peah-webrtc
2016/03/18 05:56:05
Good point!!!
Removed this one but will change oth
|
+ |
+ rtc::CriticalSection crit; |
+ VoiceDetectionImpl voice_detection(&crit); |
+ SetupComponent(sample_rate_hz, &voice_detection); |
+ |
+ std::vector<float> frame_input; |
+ int frame_size_ms; |
+ bool stream_has_voice; |
+ VoiceDetection::Likelihood likelihood; |
+ for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { |
+ ConstructTestVector(samples_per_channel, num_channels, &audio_loop, |
+ signal_level, &frame_input); |
+ |
+ test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); |
+ |
+ ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection, |
+ &frame_size_ms, &stream_has_voice, &likelihood); |
+ } |
+ |
+ // Compare the outputs to the references. |
+ EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms, |
hlundin-webrtc
2016/03/16 15:50:20
I think all of these should be changed to regular
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ frame_size_ms_reference); |
+ EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice, |
+ stream_has_voice_reference); |
+ EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood, |
+ likelihood_reference); |
+} |
+ |
+const int kNumFramesToProcess = 1000; |
+ |
+} // namespace |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) { |
+ const int kFrameSizeMsReference = 10; |
hlundin-webrtc
2016/03/16 15:50:20
kFrameSizeMsReference is always 10; define it once
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ const bool kStreamHAsVoiceReference = true; |
hlundin-webrtc
2016/03/16 15:50:20
HAs -> Has
hlundin-webrtc
2016/03/16 15:50:20
Define once before the TESTs.
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:06
Done.
|
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
hlundin-webrtc
2016/03/16 15:50:20
... and define it before the TESTs.
hlundin-webrtc
2016/03/16 15:50:20
kLli -> kLi
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:05
Done.
|
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) { |
+ const int kFrameSizeMsReference = 10; |
+ const bool kStreamHAsVoiceReference = true; |
+ const VoiceDetection::Likelihood kLlikelihoodReference = |
+ VoiceDetection::kLowLikelihood; |
+ |
+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh, |
+ kFrameSizeMsReference, kStreamHAsVoiceReference, |
+ kLlikelihoodReference); |
+} |
+ |
+} // namespace webrtc |