webrtc/modules/audio_processing/voice_detection_unittest.cc - Issue 1804373002: Added a bitexactness test for the voice activity detector in the audio processing module.

Side by Side Diff: webrtc/modules/audio_processing/voice_detection_unittest.cc

Issue 1804373002: Added a bitexactness test for the voice activity detector in the audio processing module. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@LevelEstimatorBitExactness_CL

Patch Set: Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/test/bitexactness_tools.cc ('K') | « webrtc/modules/audio_processing/test/bitexactness_tools.cc ('k') | webrtc/modules/modules.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
	hlundin-webrtc 2016/03/16 15:50:20 2016 2016 peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > 2016 Done.
	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10 #include <vector>

	11

	12 #include "testing/gtest/include/gtest/gtest.h"

	13 #include "webrtc/base/array_view.h"

	14 #include "webrtc/base/random.h"

	15 #include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h"

	16 #include "webrtc/modules/audio_processing/audio_buffer.h"

	17 #include "webrtc/modules/audio_processing/voice_detection_impl.h"

	18 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h"

	19 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h"

	20 #include "webrtc/test/testsupport/fileutils.h"

	21

	22 namespace webrtc {

	23 namespace {

	24

	25 enum TestSignalLevels { kLow, kMedium, kHigh };

	26

	27 ::testing::AssertionResult AssertLikelihoodsNotEqual(
	hlundin-webrtc 2016/03/16 15:50:20 This is a lot of code to compare two enum (integer This is a lot of code to compare two enum (integer) values. Use EXPECT_EQ instead. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > This is a lot of code to compare two enum (integer) values. Use EXPECT_EQ > instead. Done.
	28 const char* m_expr,

	29 const char* n_expr,

	30 const VoiceDetection::Likelihood& output,

	31 const VoiceDetection::Likelihood& reference) {

	32 // If the values are deemed not to be similar, return a report of the

	33 // difference.

	34 if (output != reference) {

	35 // Lambda function that produces a string containing the likelihood name.

	36 auto likelihood_description = [](VoiceDetection::Likelihood likelihood) {

	37 switch (likelihood) {

	38 case VoiceDetection::kVeryLowLikelihood:

	39 return std::string("kVeryLowLikelihood");

	40 break;

	41 case VoiceDetection::kLowLikelihood:

	42 return std::string("kLowLikelihood");

	43 break;

	44 case VoiceDetection::kModerateLikelihood:

	45 return std::string("kModerateLikelihood");

	46 break;

	47 case VoiceDetection::kHighLikelihood:

	48 return std::string("kHighLikelihood");

	49 break;

	50 default:

	51 RTC_DCHECK(false);

	52 return std::string("");

	53 }

	54 };

	55

	56 return ::testing::AssertionFailure()

	57 << "Actual: " << likelihood_description(output) << std::endl

	58 << "Expected: " << likelihood_description(reference) << std::endl;

	59 }

	60 return ::testing::AssertionSuccess();

	61 }

	62

	63 // Process one frame of data and produce the output.

	64 void ProcessOneFrame(int sample_rate_hz,

	65 AudioBuffer* audio_buffer,

	66 VoiceDetectionImpl* voice_detection,

	67 int* frame_size_ms,

	68 bool* stream_has_voice,

	69 VoiceDetection::Likelihood* likelihood) {

	70 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {

	71 audio_buffer->SplitIntoFrequencyBands();

	72 }

	73

	74 voice_detection->ProcessCaptureAudio(audio_buffer);

	75

	76 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
	hlundin-webrtc 2016/03/16 15:50:20 What is the rationale for merging the bands again? What is the rationale for merging the bands again? Is this buffer used any more after the voice detection is done? peah-webrtc 2016/03/18 05:56:05 No point at all in this case, I will remove that. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > What is the rationale for merging the bands again? Is this buffer used any more > after the voice detection is done? No point at all in this case, I will remove that.
	77 audio_buffer->MergeFrequencyBands();

	78 }

	79

	80 *frame_size_ms = voice_detection->frame_size_ms();

	81 *stream_has_voice = voice_detection->stream_has_voice();

	82 *likelihood = voice_detection->likelihood();

	83 }

	84

	85 // Forms a predefined random test vector.

	86 void ConstructTestVector(int samples_per_channel,

	87 int num_channels,

	88 test::AudioLoop* audio_loop,

	89 TestSignalLevels signal_level,

	90 std::vector<float>* testvector) {

	91 testvector->resize(samples_per_channel * num_channels);

	92 auto input_samples = audio_loop->GetNextBlock();

	93

	94 float signal_gain = 0.0f;
	hlundin-webrtc 2016/03/16 15:50:20 Consider: float signal_gain = 0.1f; // TestSigna Consider: float signal_gain = 0.1f; // TestSignalLevels::kLow if (signal_level == TestSignalLevels::kMedium) signal_gain = 0.5f; else if (signal_level == TestSignalLevels::kHigh) signal_gain = 1.0f; More compact, and if you make TestSignalLevels an enum class, you won't have to worry about other values. peah-webrtc 2016/03/18 05:56:05 Thanks. This code is now removed. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > Consider: > > float signal_gain = 0.1f; // TestSignalLevels::kLow > if (signal_level == TestSignalLevels::kMedium) > signal_gain = 0.5f; > else if (signal_level == TestSignalLevels::kHigh) > signal_gain = 1.0f; > > More compact, and if you make TestSignalLevels an enum class, you won't have to > worry about other values. Thanks. This code is now removed.
	95 switch (signal_level) {

	96 case TestSignalLevels::kLow:

	97 signal_gain = 0.1f;

	98 break;

	99 case TestSignalLevels::kMedium:

	100 signal_gain = 0.5f;

	101 break;

	102 case TestSignalLevels::kHigh:

	103 signal_gain = 1.0f;

	104 break;

	105 default:

	106 RTC_DCHECK(false);

	107 }

	108

	109 for (int k = 0; k < samples_per_channel; ++k) {

	110 for (int j = 0; j < num_channels; ++j) {

	111 (testvector)[k num_channels + j] =

	112 signal_gain * input_samples[j * samples_per_channel + k] / 32768.0f;

	113 }

	114 }

	115 }

	116

	117 void SetupComponent(int sample_rate_hz, VoiceDetectionImpl* voice_detection) {

	118 voice_detection->Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz);

	119 voice_detection->Enable(true);

	120 }

	121

	122 std::string GetTestVectorFileName(int sample_rate_hz) {
	hlundin-webrtc 2016/03/16 15:50:20 You can get rid of this function if you follow my You can get rid of this function if you follow my suggestion below. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > You can get rid of this function if you follow my suggestion below. Done.
	123 switch (sample_rate_hz) {

	124 case 8000:

	125 // Use a the 16 kHz signal for the 8 kHz case as well. Acceptable since

	126 // the test is only a bitexactness test.

	127 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",

	128 "pcm");

	129 case 16000:

	130 return webrtc::test::ResourcePath("audio_coding/speech_mono_16kHz",

	131 "pcm");

	132 case 32000:

	133 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",

	134 "pcm");

	135 case 48000:

	136 // Use a the 32 kHz signal for the 48 kHz case as well. Acceptable since

	137 // the test is only a bitexactness test.

	138 return webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz",

	139 "pcm");

	140 default:

	141 RTC_DCHECK(false);

	142 }

	143

	144 return "";

	145 }

	146

	147 // Processes a specified amount of frames, verifies the results and reports

	148 // any errors.

	149 void RunBitexactnessTest(int sample_rate_hz,

	150 int num_channels,

	151 int num_frames_to_process,

	152 TestSignalLevels signal_level,

	153 int frame_size_ms_reference,

	154 bool stream_has_voice_reference,

	155 VoiceDetection::Likelihood likelihood_reference) {

	156 test::AudioLoop audio_loop;
	hlundin-webrtc 2016/03/16 15:50:20 I would actually recommend you use a ResampleInput I would actually recommend you use a ResampleInputAudioFile instead. It is a specialization of InputAudioFile. The differences are that it reads directly from file instead of from memory, but it supports resampling so you can use a single input file for all rates. It also supports looping, just like AudioLoop. Bonus: it has a DuplicateInterleaved utility. peah-webrtc 2016/03/18 05:56:05 Thanks for the suggestion!!! I found some Resource Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > I would actually recommend you use a ResampleInputAudioFile instead. It is a > specialization of InputAudioFile. The differences are that it reads directly > from file instead of from memory, but it supports resampling so you can use a > single input file for all rates. It also supports looping, just like AudioLoop. > > Bonus: it has a DuplicateInterleaved utility. Thanks for the suggestion!!! I found some Resource files for 8-48 kHz for far-end and nearend so I will for now use those instead, using InputAudioFile. But if they prove not to be sufficiently good, I'll instead go for ResampleInputAudioFile.
	157 int samples_per_channel = 80 * sample_rate_hz / 8000;

	158 const StreamConfig stream_config(sample_rate_hz, num_channels, false);

	159 AudioBuffer audio_buffer(

	160 stream_config.num_frames(), stream_config.num_channels(),

	161 stream_config.num_frames(), stream_config.num_channels(),

	162 stream_config.num_frames());

	163

	164 std::string filename;
	hlundin-webrtc 2016/03/16 15:50:20 Not used. Not used. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > Not used. Done.
	165 bool success = audio_loop.Init(

	166 GetTestVectorFileName(sample_rate_hz),

	167 num_frames_to_process * samples_per_channel * num_channels,

	168 samples_per_channel * num_channels);

	169 RTC_DCHECK(success);
	hlundin-webrtc 2016/03/16 15:50:20 This is test code; you might as well CHECK things This is test code; you might as well CHECK things like this, since continuing with the test without an input file makes no sense, release or debug alike. peah-webrtc 2016/03/18 05:56:05 Good point!!! Removed this one but will change oth Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > This is test code; you might as well CHECK things like this, since continuing > with the test without an input file makes no sense, release or debug alike. Good point!!! Removed this one but will change other DCHECK to CHECK within the tests.
	170

	171 rtc::CriticalSection crit;

	172 VoiceDetectionImpl voice_detection(&crit);

	173 SetupComponent(sample_rate_hz, &voice_detection);

	174

	175 std::vector<float> frame_input;

	176 int frame_size_ms;

	177 bool stream_has_voice;

	178 VoiceDetection::Likelihood likelihood;

	179 for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) {

	180 ConstructTestVector(samples_per_channel, num_channels, &audio_loop,

	181 signal_level, &frame_input);

	182

	183 test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer);

	184

	185 ProcessOneFrame(sample_rate_hz, &audio_buffer, &voice_detection,

	186 &frame_size_ms, &stream_has_voice, &likelihood);

	187 }

	188

	189 // Compare the outputs to the references.

	190 EXPECT_PRED_FORMAT2(test::AssertIntegersNotEqual, frame_size_ms,
	hlundin-webrtc 2016/03/16 15:50:20 I think all of these should be changed to regular I think all of these should be changed to regular EXPECT_EQ. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > I think all of these should be changed to regular EXPECT_EQ. Done.
	191 frame_size_ms_reference);

	192 EXPECT_PRED_FORMAT2(test::AssertBoolsNotEqual, stream_has_voice,

	193 stream_has_voice_reference);

	194 EXPECT_PRED_FORMAT2(AssertLikelihoodsNotEqual, likelihood,

	195 likelihood_reference);

	196 }

	197

	198 const int kNumFramesToProcess = 1000;

	199

	200 } // namespace

	201

	202 TEST(VoiceDetectionBitExactnessTest, Mono8kHzLow) {

	203 const int kFrameSizeMsReference = 10;
	hlundin-webrtc 2016/03/16 15:50:20 kFrameSizeMsReference is always 10; define it once kFrameSizeMsReference is always 10; define it once before the TESTs. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > kFrameSizeMsReference is always 10; define it once before the TESTs. Done.
	204 const bool kStreamHAsVoiceReference = true;
	hlundin-webrtc 2016/03/16 15:50:20 HAs -> Has HAs -> Has hlundin-webrtc 2016/03/16 15:50:20 Define once before the TESTs. Define once before the TESTs. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > Define once before the TESTs. Done. peah-webrtc 2016/03/18 05:56:06 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > HAs -> Has Done.
	205 const VoiceDetection::Likelihood kLlikelihoodReference =
	hlundin-webrtc 2016/03/16 15:50:20 ... and define it before the TESTs. ... and define it before the TESTs. hlundin-webrtc 2016/03/16 15:50:20 kLli -> kLi kLli -> kLi peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > ... and define it before the TESTs. Done. peah-webrtc 2016/03/18 05:56:05 Done. Show quoted text On 2016/03/16 15:50:20, hlundin-webrtc wrote: > ... and define it before the TESTs. Done.
	206 VoiceDetection::kLowLikelihood;

	207

	208 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

	209 kFrameSizeMsReference, kStreamHAsVoiceReference,

	210 kLlikelihoodReference);

	211 }

	212

	213 TEST(VoiceDetectionBitExactnessTest, Mono16kHzLow) {

	214 const int kFrameSizeMsReference = 10;

	215 const bool kStreamHAsVoiceReference = true;

	216 const VoiceDetection::Likelihood kLlikelihoodReference =

	217 VoiceDetection::kLowLikelihood;

	218

	219 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

	220 kFrameSizeMsReference, kStreamHAsVoiceReference,

	221 kLlikelihoodReference);

	222 }

	223

	224 TEST(VoiceDetectionBitExactnessTest, Mono32kHzLow) {

	225 const int kFrameSizeMsReference = 10;

	226 const bool kStreamHAsVoiceReference = true;

	227 const VoiceDetection::Likelihood kLlikelihoodReference =

	228 VoiceDetection::kLowLikelihood;

	229

	230 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

	231 kFrameSizeMsReference, kStreamHAsVoiceReference,

	232 kLlikelihoodReference);

	233 }

	234

	235 TEST(VoiceDetectionBitExactnessTest, Mono48kHzLow) {

	236 const int kFrameSizeMsReference = 10;

	237 const bool kStreamHAsVoiceReference = true;

	238 const VoiceDetection::Likelihood kLlikelihoodReference =

	239 VoiceDetection::kLowLikelihood;

	240

	241 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kLow,

	242 kFrameSizeMsReference, kStreamHAsVoiceReference,

	243 kLlikelihoodReference);

	244 }

	245

	246 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzLow) {

	247 const int kFrameSizeMsReference = 10;

	248 const bool kStreamHAsVoiceReference = true;

	249 const VoiceDetection::Likelihood kLlikelihoodReference =

	250 VoiceDetection::kLowLikelihood;

	251

	252 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kLow,

	253 kFrameSizeMsReference, kStreamHAsVoiceReference,

	254 kLlikelihoodReference);

	255 }

	256

	257 TEST(VoiceDetectionBitExactnessTest, Mono8kHzMedium) {

	258 const int kFrameSizeMsReference = 10;

	259 const bool kStreamHAsVoiceReference = true;

	260 const VoiceDetection::Likelihood kLlikelihoodReference =

	261 VoiceDetection::kLowLikelihood;

	262

	263 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

	264 kFrameSizeMsReference, kStreamHAsVoiceReference,

	265 kLlikelihoodReference);

	266 }

	267

	268 TEST(VoiceDetectionBitExactnessTest, Mono16kHzMedium) {

	269 const int kFrameSizeMsReference = 10;

	270 const bool kStreamHAsVoiceReference = true;

	271 const VoiceDetection::Likelihood kLlikelihoodReference =

	272 VoiceDetection::kLowLikelihood;

	273

	274 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

	275 kFrameSizeMsReference, kStreamHAsVoiceReference,

	276 kLlikelihoodReference);

	277 }

	278

	279 TEST(VoiceDetectionBitExactnessTest, Mono32kHzMedium) {

	280 const int kFrameSizeMsReference = 10;

	281 const bool kStreamHAsVoiceReference = true;

	282 const VoiceDetection::Likelihood kLlikelihoodReference =

	283 VoiceDetection::kLowLikelihood;

	284

	285 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

	286 kFrameSizeMsReference, kStreamHAsVoiceReference,

	287 kLlikelihoodReference);

	288 }

	289

	290 TEST(VoiceDetectionBitExactnessTest, Mono48kHzMedium) {

	291 const int kFrameSizeMsReference = 10;

	292 const bool kStreamHAsVoiceReference = true;

	293 const VoiceDetection::Likelihood kLlikelihoodReference =

	294 VoiceDetection::kLowLikelihood;

	295

	296 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kMedium,

	297 kFrameSizeMsReference, kStreamHAsVoiceReference,

	298 kLlikelihoodReference);

	299 }

	300

	301 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzMedium) {

	302 const int kFrameSizeMsReference = 10;

	303 const bool kStreamHAsVoiceReference = true;

	304 const VoiceDetection::Likelihood kLlikelihoodReference =

	305 VoiceDetection::kLowLikelihood;

	306

	307 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kMedium,

	308 kFrameSizeMsReference, kStreamHAsVoiceReference,

	309 kLlikelihoodReference);

	310 }

	311

	312 TEST(VoiceDetectionBitExactnessTest, Mono8kHzHigh) {

	313 const int kFrameSizeMsReference = 10;

	314 const bool kStreamHAsVoiceReference = true;

	315 const VoiceDetection::Likelihood kLlikelihoodReference =

	316 VoiceDetection::kLowLikelihood;

	317

	318 RunBitexactnessTest(8000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

	319 kFrameSizeMsReference, kStreamHAsVoiceReference,

	320 kLlikelihoodReference);

	321 }

	322

	323 TEST(VoiceDetectionBitExactnessTest, Mono16kHzHigh) {

	324 const int kFrameSizeMsReference = 10;

	325 const bool kStreamHAsVoiceReference = true;

	326 const VoiceDetection::Likelihood kLlikelihoodReference =

	327 VoiceDetection::kLowLikelihood;

	328

	329 RunBitexactnessTest(16000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

	330 kFrameSizeMsReference, kStreamHAsVoiceReference,

	331 kLlikelihoodReference);

	332 }

	333

	334 TEST(VoiceDetectionBitExactnessTest, Mono32kHzHigh) {

	335 const int kFrameSizeMsReference = 10;

	336 const bool kStreamHAsVoiceReference = true;

	337 const VoiceDetection::Likelihood kLlikelihoodReference =

	338 VoiceDetection::kLowLikelihood;

	339

	340 RunBitexactnessTest(32000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

	341 kFrameSizeMsReference, kStreamHAsVoiceReference,

	342 kLlikelihoodReference);

	343 }

	344

	345 TEST(VoiceDetectionBitExactnessTest, Mono48kHzHigh) {

	346 const int kFrameSizeMsReference = 10;

	347 const bool kStreamHAsVoiceReference = true;

	348 const VoiceDetection::Likelihood kLlikelihoodReference =

	349 VoiceDetection::kLowLikelihood;

	350

	351 RunBitexactnessTest(48000, 1, kNumFramesToProcess, TestSignalLevels::kHigh,

	352 kFrameSizeMsReference, kStreamHAsVoiceReference,

	353 kLlikelihoodReference);

	354 }

	355

	356 TEST(VoiceDetectionBitExactnessTest, Stereo16kHzHigh) {

	357 const int kFrameSizeMsReference = 10;

	358 const bool kStreamHAsVoiceReference = true;

	359 const VoiceDetection::Likelihood kLlikelihoodReference =

	360 VoiceDetection::kLowLikelihood;

	361

	362 RunBitexactnessTest(16000, 2, kNumFramesToProcess, TestSignalLevels::kHigh,

	363 kFrameSizeMsReference, kStreamHAsVoiceReference,

	364 kLlikelihoodReference);

	365 }

	366

	367 } // namespace webrtc

OLD	NEW