Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1711)

Unified Diff: webrtc/modules/audio_processing/voice_detection_unittest.cc

Issue 1804373002: Added a bitexactness test for the voice activity detector in the audio processing module. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@LevelEstimatorBitExactness_CL
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/voice_detection_unittest.cc
diff --git a/webrtc/modules/audio_processing/voice_detection_unittest.cc b/webrtc/modules/audio_processing/voice_detection_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4d1b6a920f85416b3f4ca6bd3f1651c966717633
--- /dev/null
+++ b/webrtc/modules/audio_processing/voice_detection_unittest.cc
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
hlundin-webrtc 2016/03/16 15:50:20 2016
peah-webrtc 2016/03/18 05:56:05 Done.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <vector>
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/base/array_view.h"
+#include "webrtc/base/random.h"
+#include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/modules/audio_processing/voice_detection_impl.h"
+#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h"
+#include "webrtc/modules/audio_processing/test/bitexactness_tools.h"
+#include "webrtc/test/testsupport/fileutils.h"
+
+namespace webrtc {
+namespace {
+
+enum TestSignalLevels { kLow, kMedium, kHigh };
+
+::testing::AssertionResult AssertLikelihoodsNotEqual(
hlundin-webrtc 2016/03/16 15:50:20 This is a lot of code to compare two enum (integer
peah-webrtc 2016/03/18 05:56:05 Done.
+ const char* m_expr,
+ const char* n_expr,
+ const VoiceDetection::Likelihood& output,
+ const VoiceDetection::Likelihood& reference) {
+ // If the values are deemed not to be similar, return a report of the
+ // difference.
+ if (output != reference) {
+ // Lambda function that produces a string containing the likelihood name.
+ auto likelihood_description = [](VoiceDetection::Likelihood likelihood) {
+ switch (likelihood) {
+ case VoiceDetection::kVeryLowLikelihood:
+ return std::string("kVeryLowLikelihood");
+ break;
+ case VoiceDetection::kLowLikelihood:
+ return std::string("kLowLikelihood");
+ break;
+ case VoiceDetection::kModerateLikelihood:
+ return std::string("kModerateLikelihood");
+ break;
+ case VoiceDetection::kHighLikelihood:
+ return std::string("kHighLikelihood");
+ break;
+ default:
+ RTC_DCHECK(false);
+ return std::string("");
+ }
+ };
+
+ return ::testing::AssertionFailure()
+ << "Actual: " << likelihood_description(output) << std::endl
+ << "Expected: " << likelihood_description(reference) << std::endl;
+ }
+ return ::testing::AssertionSuccess();
+}
+
+// Process one frame of data and produce the output.
+void ProcessOneFrame(int sample_rate_hz,
+ AudioBuffer* audio_buffer,
+ VoiceDetectionImpl* voice_detection,
+ int* frame_size_ms,
+ bool* stream_has_voice,
+ VoiceDetection::Likelihood* likelihood) {
+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
+ audio_buffer->SplitIntoFrequencyBands();
+ }
+
+ voice_detection->ProcessCaptureAudio(audio_buffer);
+
+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
hlundin-webrtc 2016/03/16 15:50:20 What is the rationale for merging the bands again?
peah-webrtc 2016/03/18 05:56:05 No point at all in this case, I will remove that.
+ audio_buffer->MergeFrequencyBands();
+ }
+
+ *frame_size_ms = voice_detection->frame_size_ms();
+ *stream_has_voice = voice_detection->stream_has_voice();
+ *likelihood = voice_detection->likelihood();
+}
+
+// Forms a predefined random test vector.
+void ConstructTestVector(int samples_per_channel,
+ int num_channels,
+ test::AudioLoop* audio_loop,
+ TestSignalLevels signal_level,
+ std::vector<float>* testvector) {
+ testvector->resize(samples_per_channel * num_channels);
+ auto input_samples = audio_loop->GetNextBlock();
+
+ float signal_gain = 0.0f;
hlundin-webrtc 2016/03/16 15:50:20 Consider: float signal_gain = 0.1f; // TestSigna
peah-webrtc 2016/03/18 05:56:05 Thanks. This code is now removed.
+ switch (signal_level) {
+ case TestSignalLevels::kLow:
+ signal_gain = 0.1f;
+ break;
+ case TestSignalLevels::kMedium:
+ signal_gain = 0.5f;
+ break;
+ case TestSignalLevels::kHigh:
+ signal_gain = 1.0f;
+ break;
+ default:
+ RTC_DCHECK(false);
+ }
+
+ for (int k = 0; k < samples_per_channel; ++k) {
+ for (int j = 0; j < num_channels; ++j) {
+ (*testvector)[k * num_channels + j] =
+ signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f;
+ }
+ }
+}
+
+void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) {
+ voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz);
+ voice_detection->Enable(true);
+}
+
+std::string GetTestVectorFileName(int sample_rate_hz) {
hlundin-webrtc 2016/03/16 15:50:20 You can get rid of this function if you follow my
peah-webrtc 2016/03/18 05:56:05 Done.
+ switch (sample_rate_hz) {
+ case 8000:
+ // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since
+ // the test is only a bitexactness test.
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",
+ "pcm");
+ case 16000:
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",
+ "pcm");
+ case 32000:
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",
+ "pcm");
+ case 48000:
+ // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since
+ // the test is only a bitexactness test.
+ return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",
+ "pcm");
+ default:
+ RTC_DCHECK(false);
+ }
+
+ return "";
+}
+
+// Processes a specified amount of frames, verifies the results and reports
+// any errors.
+void RunBitexactnessTest(int sample_rate_hz,
+ int num_channels,
+ int num_frames_to_process,
+ TestSignalLevels signal_level,
+ int frame_size_ms_reference,
+ bool stream_has_voice_reference,
+ VoiceDetection::Likelihood likelihood_reference) {
+ test::AudioLoop audio_loop;
hlundin-webrtc 2016/03/16 15:50:20 I would actually recommend you use a ResampleInput
peah-webrtc 2016/03/18 05:56:05 Thanks for the suggestion!!! I found some Resource
+ int samples_per_channel = 80 * sample_rate_hz / 8000;
+ const StreamConfig stream_config(sample_rate_hz, num_channels, false);
+ AudioBuffer audio_buffer(
+ stream_config.num_frames(), stream_config.num_channels(),
+ stream_config.num_frames(), stream_config.num_channels(),
+ stream_config.num_frames());
+
+ std::string filename;
hlundin-webrtc 2016/03/16 15:50:20 Not used.
peah-webrtc 2016/03/18 05:56:05 Done.
+ bool success = audio_loop.Init(
+ GetTestVectorFileName(sample_rate_hz),
+ num_frames_to_process * samples_per_channel * num_channels,
+ samples_per_channel * num_channels);
+ RTC_DCHECK(success);
hlundin-webrtc 2016/03/16 15:50:20 This is test code; you might as well CHECK things
peah-webrtc 2016/03/18 05:56:05 Good point!!! Removed this one but will change oth
+
+ rtc::CriticalSection crit;
+ VoiceDetectionImpl voice_detection(&crit);
+ SetupComponent(sample_rate_hz, &voice_detection);
+
+ std::vector<float> frame_input;
+ int frame_size_ms;
+ bool stream_has_voice;
+ VoiceDetection::Likelihood likelihood;
+ for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) {
+ ConstructTestVector(samples_per_channel, num_channels, &audio_loop,
+ signal_level, &frame_input);
+
+ test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer);
+
+ ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection,
+ &frame_size_ms, &stream_has_voice, &likelihood);
+ }
+
+ // Compare the outputs to the references.
+ EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms,
hlundin-webrtc 2016/03/16 15:50:20 I think all of these should be changed to regular
peah-webrtc 2016/03/18 05:56:05 Done.
+ frame_size_ms_reference);
+ EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice,
+ stream_has_voice_reference);
+ EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood,
+ likelihood_reference);
+}
+
+const int kNumFramesToProcess = 1000;
+
+} // namespace
+
+TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) {
+ const int kFrameSizeMsReference = 10;
hlundin-webrtc 2016/03/16 15:50:20 kFrameSizeMsReference is always 10; define it once
peah-webrtc 2016/03/18 05:56:05 Done.
+ const bool kStreamHAsVoiceReference = true;
hlundin-webrtc 2016/03/16 15:50:20 HAs -> Has
hlundin-webrtc 2016/03/16 15:50:20 Define once before the TESTs.
peah-webrtc 2016/03/18 05:56:05 Done.
peah-webrtc 2016/03/18 05:56:06 Done.
+ const VoiceDetection::Likelihood kLlikelihoodReference =
hlundin-webrtc 2016/03/16 15:50:20 ... and define it before the TESTs.
hlundin-webrtc 2016/03/16 15:50:20 kLli -> kLi
peah-webrtc 2016/03/18 05:56:05 Done.
peah-webrtc 2016/03/18 05:56:05 Done.
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) {
+ const int kFrameSizeMsReference = 10;
+ const bool kStreamHAsVoiceReference = true;
+ const VoiceDetection::Likelihood kLlikelihoodReference =
+ VoiceDetection::kLowLikelihood;
+
+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh,
+ kFrameSizeMsReference, kStreamHAsVoiceReference,
+ kLlikelihoodReference);
+}
+
+} // namespace webrtc

Powered by Google App Engine
This is Rietveld 408576698