webrtc/modules/audio_processing/voice_detection_unittest.cc - Issue 1804373002: Added a bitexactness test for the voice activity detector in the audio processing module.

Unified Diff: webrtc/modules/audio_processing/voice_detection_unittest.cc

Issue 1804373002: Added a bitexactness test for the voice activity detector in the audio processing module. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@LevelEstimatorBitExactness_CL

Patch Set: Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/test/bitexactness_tools.cc ('K') | « webrtc/modules/audio_processing/test/bitexactness_tools.cc ('k') | webrtc/modules/modules.gyp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/voice_detection_unittest.cc

diff --git a/webrtc/modules/audio_processing/voice_detection_unittest.cc b/webrtc/modules/audio_processing/voice_detection_unittest.cc

new file mode 100644

index 0000000000000000000000000000000000000000..4d1b6a920f85416b3f4ca6bd3f1651c966717633

--- /dev/null

+++ b/webrtc/modules/audio_processing/voice_detection_unittest.cc

@@ -0,0 +1,367 @@

+/*

hlundin-webrtc 2016/03/16 15:50:20 2016

peah-webrtc 2016/03/18 05:56:05 Done.

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include <vector>

+#include "testing/gtest/include/gtest/gtest.h"

+#include "webrtc/base/array_view.h"

+#include "webrtc/base/random.h"

+#include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h"

+#include "webrtc/modules/audio_processing/audio_buffer.h"

+#include "webrtc/modules/audio_processing/voice_detection_impl.h"

+#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h"

+#include "webrtc/modules/audio_processing/test/bitexactness_tools.h"

+#include "webrtc/test/testsupport/fileutils.h"

+namespace webrtc {

+namespace {

+enum TestSignalLevels { kLow, kMedium, kHigh };

+::testing::AssertionResult AssertLikelihoodsNotEqual(

hlundin-webrtc 2016/03/16 15:50:20 This is a lot of code to compare two enum (integer

peah-webrtc 2016/03/18 05:56:05 Done.

+ const char* m_expr,

+ const char* n_expr,

+ const VoiceDetection::Likelihood& output,

+ const VoiceDetection::Likelihood& reference) {

+ // If the values are deemed not to be similar, return a report of the

+ // difference.

+ if (output != reference) {

+ // Lambda function that produces a string containing the likelihood name.

+ auto likelihood_description = [](VoiceDetection::Likelihood likelihood) {

+ switch (likelihood) {

+ case VoiceDetection::kVeryLowLikelihood:

+ return std::string("kVeryLowLikelihood");

+ break;

+ case VoiceDetection::kLowLikelihood:

+ return std::string("kLowLikelihood");

+ break;

+ case VoiceDetection::kModerateLikelihood:

+ return std::string("kModerateLikelihood");

+ break;

+ case VoiceDetection::kHighLikelihood:

+ return std::string("kHighLikelihood");

+ break;

+ default:

+ RTC_DCHECK(false);

+ return std::string("");

+ }

+ };

+ return ::testing::AssertionFailure()

+ << "Actual: " << likelihood_description(output) << std::endl

+ << "Expected: " << likelihood_description(reference) << std::endl;

+ }

+ return ::testing::AssertionSuccess();

+// Process one frame of data and produce the output.

+void ProcessOneFrame(int sample_rate_hz,

+ AudioBuffer* audio_buffer,

+ VoiceDetectionImpl* voice_detection,

+ int* frame_size_ms,

+ bool* stream_has_voice,

+ VoiceDetection::Likelihood* likelihood) {

+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {

+ audio_buffer->SplitIntoFrequencyBands();

+ }

+ voice_detection->ProcessCaptureAudio(audio_buffer);

+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {

hlundin-webrtc 2016/03/16 15:50:20 What is the rationale for merging the bands again?

peah-webrtc 2016/03/18 05:56:05 No point at all in this case, I will remove that.

+ audio_buffer->MergeFrequencyBands();

+ }

+ *frame_size_ms = voice_detection->frame_size_ms();

+ *stream_has_voice = voice_detection->stream_has_voice();

+ *likelihood = voice_detection->likelihood();

+// Forms a predefined random test vector.

+void ConstructTestVector(int samples_per_channel,

+ int num_channels,

+ test::AudioLoop* audio_loop,

+ TestSignalLevels signal_level,

+ std::vector<float>* testvector) {

+ testvector->resize(samples_per_channel * num_channels);

+ auto input_samples = audio_loop->GetNextBlock();

+ float signal_gain = 0.0f;

hlundin-webrtc 2016/03/16 15:50:20 Consider: float signal_gain = 0.1f; // TestSigna

peah-webrtc 2016/03/18 05:56:05 Thanks. This code is now removed.

+ switch (signal_level) {

+ case TestSignalLevels::kLow:

+ signal_gain = 0.1f;

+ break;

+ case TestSignalLevels::kMedium:

+ signal_gain = 0.5f;

+ break;

+ case TestSignalLevels::kHigh:

+ signal_gain = 1.0f;

+ break;

+ default:

+ RTC_DCHECK(false);

+ }

+ for (int k = 0; k < samples_per_channel; ++k) {

+ for (int j = 0; j < num_channels; ++j) {

+ (*testvector)[k * num_channels + j] =

+ signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f;

+ }

+void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) {

+ voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz);

+ voice_detection->Enable(true);

+std::string GetTestVectorFileName(int sample_rate_hz) {

hlundin-webrtc 2016/03/16 15:50:20 You can get rid of this function if you follow my

peah-webrtc 2016/03/18 05:56:05 Done.

+ switch (sample_rate_hz) {

+ case 8000:

+ // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since

+ // the test is only a bitexactness test.

+ return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",

+ "pcm");

+ case 16000:

+ return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",

+ "pcm");

+ case 32000:

+ return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",

+ "pcm");

+ case 48000:

+ // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since

+ // the test is only a bitexactness test.

+ return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",

+ "pcm");

+ default:

+ RTC_DCHECK(false);

+ }

+ return "";

+// Processes a specified amount of frames, verifies the results and reports

+// any errors.

+void RunBitexactnessTest(int sample_rate_hz,

+ int num_channels,

+ int num_frames_to_process,

+ TestSignalLevels signal_level,

+ int frame_size_ms_reference,

+ bool stream_has_voice_reference,

+ VoiceDetection::Likelihood likelihood_reference) {

+ test::AudioLoop audio_loop;

hlundin-webrtc 2016/03/16 15:50:20 I would actually recommend you use a ResampleInput

peah-webrtc 2016/03/18 05:56:05 Thanks for the suggestion!!! I found some Resource

+ int samples_per_channel = 80 * sample_rate_hz / 8000;

+ const StreamConfig stream_config(sample_rate_hz, num_channels, false);

+ AudioBuffer audio_buffer(

+ stream_config.num_frames(), stream_config.num_channels(),

+ stream_config.num_frames());

+ std::string filename;

hlundin-webrtc 2016/03/16 15:50:20 Not used.

peah-webrtc 2016/03/18 05:56:05 Done.

+ bool success = audio_loop.Init(

+ GetTestVectorFileName(sample_rate_hz),

+ num_frames_to_process * samples_per_channel * num_channels,

+ samples_per_channel * num_channels);

+ RTC_DCHECK(success);

hlundin-webrtc 2016/03/16 15:50:20 This is test code; you might as well CHECK things

peah-webrtc 2016/03/18 05:56:05 Good point!!! Removed this one but will change oth

+ rtc::CriticalSection crit;

+ VoiceDetectionImpl voice_detection(&crit);

+ SetupComponent(sample_rate_hz, &voice_detection);

+ std::vector<float> frame_input;

+ int frame_size_ms;

+ bool stream_has_voice;

+ VoiceDetection::Likelihood likelihood;

+ for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) {

+ ConstructTestVector(samples_per_channel, num_channels, &audio_loop,

+ signal_level, &frame_input);

+ test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer);

+ ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection,

+ &frame_size_ms, &stream_has_voice, &likelihood);

+ }

+ // Compare the outputs to the references.

+ EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms,

hlundin-webrtc 2016/03/16 15:50:20 I think all of these should be changed to regular

peah-webrtc 2016/03/18 05:56:05 Done.

+ frame_size_ms_reference);

+ EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice,

+ stream_has_voice_reference);

+ EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood,

+ likelihood_reference);

+const int kNumFramesToProcess = 1000;

+} // namespace

+TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) {

+ const int kFrameSizeMsReference = 10;

hlundin-webrtc 2016/03/16 15:50:20 kFrameSizeMsReference is always 10; define it once

peah-webrtc 2016/03/18 05:56:05 Done.

+ const bool kStreamHAsVoiceReference = true;

hlundin-webrtc 2016/03/16 15:50:20 HAs -> Has

hlundin-webrtc 2016/03/16 15:50:20 Define once before the TESTs.

peah-webrtc 2016/03/18 05:56:05 Done.

peah-webrtc 2016/03/18 05:56:06 Done.

+ const VoiceDetection::Likelihood kLlikelihoodReference =

hlundin-webrtc 2016/03/16 15:50:20 ... and define it before the TESTs.

hlundin-webrtc 2016/03/16 15:50:20 kLli -> kLi

peah-webrtc 2016/03/18 05:56:05 Done.

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) {

+ const int kFrameSizeMsReference = 10;

+ const bool kStreamHAsVoiceReference = true;

+ const VoiceDetection::Likelihood kLlikelihoodReference =

+ VoiceDetection::kLowLikelihood;

+ RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh,

+ kFrameSizeMsReference, kStreamHAsVoiceReference,

+ kLlikelihoodReference);

+} // namespace webrtc