Chromium Code Reviews| Index: webrtc/modules/audio_processing/voice_detection_unittest.cc |
| diff --git a/webrtc/modules/audio_processing/voice_detection_unittest.cc b/webrtc/modules/audio_processing/voice_detection_unittest.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..4d1b6a920f85416b3f4ca6bd3f1651c966717633 |
| --- /dev/null |
| +++ b/webrtc/modules/audio_processing/voice_detection_unittest.cc |
| @@ -0,0 +1,367 @@ |
| +/* |
| + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
|
hlundin-webrtc
2016/03/16 15:50:20
2016
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + * |
| + * Use of this source code is governed by a BSD-style license |
| + * that can be found in the LICENSE file in the root of the source |
| + * tree. An additional intellectual property rights grant can be found |
| + * in the file PATENTS. All contributing project authors may |
| + * be found in the AUTHORS file in the root of the source tree. |
| + */ |
| +#include <vector> |
| + |
| +#include "testing/gtest/include/gtest/gtest.h" |
| +#include "webrtc/base/array_view.h" |
| +#include "webrtc/base/random.h" |
| +#include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h" |
| +#include "webrtc/modules/audio_processing/audio_buffer.h" |
| +#include "webrtc/modules/audio_processing/voice_detection_impl.h" |
| +#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" |
| +#include "webrtc/modules/audio_processing/test/bitexactness_tools.h" |
| +#include "webrtc/test/testsupport/fileutils.h" |
| + |
| +namespace webrtc { |
| +namespace { |
| + |
| +enum TestSignalLevels { kLow, kMedium, kHigh }; |
| + |
| +::testing::AssertionResult AssertLikelihoodsNotEqual( |
|
hlundin-webrtc
2016/03/16 15:50:20
This is a lot of code to compare two enum (integer
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + const char* m_expr, |
| + const char* n_expr, |
| + const VoiceDetection::Likelihood& output, |
| + const VoiceDetection::Likelihood& reference) { |
| + // If the values are deemed not to be similar, return a report of the |
| + // difference. |
| + if (output != reference) { |
| + // Lambda function that produces a string containing the likelihood name. |
| + auto likelihood_description = [](VoiceDetection::Likelihood likelihood) { |
| + switch (likelihood) { |
| + case VoiceDetection::kVeryLowLikelihood: |
| + return std::string("kVeryLowLikelihood"); |
| + break; |
| + case VoiceDetection::kLowLikelihood: |
| + return std::string("kLowLikelihood"); |
| + break; |
| + case VoiceDetection::kModerateLikelihood: |
| + return std::string("kModerateLikelihood"); |
| + break; |
| + case VoiceDetection::kHighLikelihood: |
| + return std::string("kHighLikelihood"); |
| + break; |
| + default: |
| + RTC_DCHECK(false); |
| + return std::string(""); |
| + } |
| + }; |
| + |
| + return ::testing::AssertionFailure() |
| + << "Actual: " << likelihood_description(output) << std::endl |
| + << "Expected: " << likelihood_description(reference) << std::endl; |
| + } |
| + return ::testing::AssertionSuccess(); |
| +} |
| + |
| +// Process one frame of data and produce the output. |
| +void ProcessOneFrame(int sample_rate_hz, |
| + AudioBuffer* audio_buffer, |
| + VoiceDetectionImpl* voice_detection, |
| + int* frame_size_ms, |
| + bool* stream_has_voice, |
| + VoiceDetection::Likelihood* likelihood) { |
| + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| + audio_buffer->SplitIntoFrequencyBands(); |
| + } |
| + |
| + voice_detection->ProcessCaptureAudio(audio_buffer); |
| + |
| + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
|
hlundin-webrtc
2016/03/16 15:50:20
What is the rationale for merging the bands again?
peah-webrtc
2016/03/18 05:56:05
No point at all in this case, I will remove that.
|
| + audio_buffer->MergeFrequencyBands(); |
| + } |
| + |
| + *frame_size_ms = voice_detection->frame_size_ms(); |
| + *stream_has_voice = voice_detection->stream_has_voice(); |
| + *likelihood = voice_detection->likelihood(); |
| +} |
| + |
| +// Forms a predefined random test vector. |
| +void ConstructTestVector(int samples_per_channel, |
| + int num_channels, |
| + test::AudioLoop* audio_loop, |
| + TestSignalLevels signal_level, |
| + std::vector<float>* testvector) { |
| + testvector->resize(samples_per_channel * num_channels); |
| + auto input_samples = audio_loop->GetNextBlock(); |
| + |
| + float signal_gain = 0.0f; |
|
hlundin-webrtc
2016/03/16 15:50:20
Consider:
float signal_gain = 0.1f; // TestSigna
peah-webrtc
2016/03/18 05:56:05
Thanks. This code is now removed.
|
| + switch (signal_level) { |
| + case TestSignalLevels::kLow: |
| + signal_gain = 0.1f; |
| + break; |
| + case TestSignalLevels::kMedium: |
| + signal_gain = 0.5f; |
| + break; |
| + case TestSignalLevels::kHigh: |
| + signal_gain = 1.0f; |
| + break; |
| + default: |
| + RTC_DCHECK(false); |
| + } |
| + |
| + for (int k = 0; k < samples_per_channel; ++k) { |
| + for (int j = 0; j < num_channels; ++j) { |
| + (*testvector)[k * num_channels + j] = |
| + signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f; |
| + } |
| + } |
| +} |
| + |
| +void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) { |
| + voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz); |
| + voice_detection->Enable(true); |
| +} |
| + |
| +std::string GetTestVectorFileName(int sample_rate_hz) { |
|
hlundin-webrtc
2016/03/16 15:50:20
You can get rid of this function if you follow my
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + switch (sample_rate_hz) { |
| + case 8000: |
| + // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since |
| + // the test is only a bitexactness test. |
| + return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", |
| + "pcm"); |
| + case 16000: |
| + return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz", |
| + "pcm"); |
| + case 32000: |
| + return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", |
| + "pcm"); |
| + case 48000: |
| + // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since |
| + // the test is only a bitexactness test. |
| + return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", |
| + "pcm"); |
| + default: |
| + RTC_DCHECK(false); |
| + } |
| + |
| + return ""; |
| +} |
| + |
| +// Processes a specified amount of frames, verifies the results and reports |
| +// any errors. |
| +void RunBitexactnessTest(int sample_rate_hz, |
| + int num_channels, |
| + int num_frames_to_process, |
| + TestSignalLevels signal_level, |
| + int frame_size_ms_reference, |
| + bool stream_has_voice_reference, |
| + VoiceDetection::Likelihood likelihood_reference) { |
| + test::AudioLoop audio_loop; |
|
hlundin-webrtc
2016/03/16 15:50:20
I would actually recommend you use a ResampleInput
peah-webrtc
2016/03/18 05:56:05
Thanks for the suggestion!!! I found some Resource
|
| + int samples_per_channel = 80 * sample_rate_hz / 8000; |
| + const StreamConfig stream_config(sample_rate_hz, num_channels, false); |
| + AudioBuffer audio_buffer( |
| + stream_config.num_frames(), stream_config.num_channels(), |
| + stream_config.num_frames(), stream_config.num_channels(), |
| + stream_config.num_frames()); |
| + |
| + std::string filename; |
|
hlundin-webrtc
2016/03/16 15:50:20
Not used.
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + bool success = audio_loop.Init( |
| + GetTestVectorFileName(sample_rate_hz), |
| + num_frames_to_process * samples_per_channel * num_channels, |
| + samples_per_channel * num_channels); |
| + RTC_DCHECK(success); |
|
hlundin-webrtc
2016/03/16 15:50:20
This is test code; you might as well CHECK things
peah-webrtc
2016/03/18 05:56:05
Good point!!!
Removed this one but will change oth
|
| + |
| + rtc::CriticalSection crit; |
| + VoiceDetectionImpl voice_detection(&crit); |
| + SetupComponent(sample_rate_hz, &voice_detection); |
| + |
| + std::vector<float> frame_input; |
| + int frame_size_ms; |
| + bool stream_has_voice; |
| + VoiceDetection::Likelihood likelihood; |
| + for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { |
| + ConstructTestVector(samples_per_channel, num_channels, &audio_loop, |
| + signal_level, &frame_input); |
| + |
| + test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); |
| + |
| + ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection, |
| + &frame_size_ms, &stream_has_voice, &likelihood); |
| + } |
| + |
| + // Compare the outputs to the references. |
| + EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms, |
|
hlundin-webrtc
2016/03/16 15:50:20
I think all of these should be changed to regular
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + frame_size_ms_reference); |
| + EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice, |
| + stream_has_voice_reference); |
| + EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood, |
| + likelihood_reference); |
| +} |
| + |
| +const int kNumFramesToProcess = 1000; |
| + |
| +} // namespace |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) { |
| + const int kFrameSizeMsReference = 10; |
|
hlundin-webrtc
2016/03/16 15:50:20
kFrameSizeMsReference is always 10; define it once
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + const bool kStreamHAsVoiceReference = true; |
|
hlundin-webrtc
2016/03/16 15:50:20
HAs -> Has
hlundin-webrtc
2016/03/16 15:50:20
Define once before the TESTs.
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:06
Done.
|
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
|
hlundin-webrtc
2016/03/16 15:50:20
... and define it before the TESTs.
hlundin-webrtc
2016/03/16 15:50:20
kLli -> kLi
peah-webrtc
2016/03/18 05:56:05
Done.
peah-webrtc
2016/03/18 05:56:05
Done.
|
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) { |
| + const int kFrameSizeMsReference = 10; |
| + const bool kStreamHAsVoiceReference = true; |
| + const VoiceDetection::Likelihood kLlikelihoodReference = |
| + VoiceDetection::kLowLikelihood; |
| + |
| + RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh, |
| + kFrameSizeMsReference, kStreamHAsVoiceReference, |
| + kLlikelihoodReference); |
| +} |
| + |
| +} // namespace webrtc |