webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc - Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)

Patch Set: rebase Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn ('k') | webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

	11 // This file consists of unit tests for webrtc::test::conversational_speech

	12 // members. Part of the tests focus on accepting or rejecting different

	13 // conversational speech setups. A setup is defined by a set of audio tracks and

	14 // timing information.

	15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest,

	16 // MultiEndCallSetup*) function looks like the drawing below and indicates which

	17 // setup is tested.

	18 //

	19 // Accept:

	20 // A 0****.....

	21 // B .....1****

	22 //

	23 // The drawing indicates the following:

	24 // - the illustrated setup should be accepted,

	25 // - there are two speakers (namely, A and B),

	26 // - A is the first speaking, B is the second one,

	27 // - each character after the speaker's letter indicates a time unit (e.g., 100

	28 // ms),

	29 // - "*" indicates speaking, "." listening,

	30 // - numbers indicate the turn index in std::vector<Turn>.

	31 //

	32 // Note that the same speaker can appear in multiple lines in order to depict

	33 // cases in which there are wrong offsets leading to self cross-talk (which is

	34 // rejected).

	35

11 #include <stdio.h>	36 #include <stdio.h>

	37 #include <map>

12 #include <memory>	38 #include <memory>

13	39

	40 #include "webrtc/base/logging.h"

14 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"	41 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"

15 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"	42 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"

16 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	43 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

17 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"	44 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"

18 #include "webrtc/test/gmock.h"	45 #include "webrtc/test/gmock.h"

19 #include "webrtc/test/gtest.h"	46 #include "webrtc/test/gtest.h"

20 #include "webrtc/test/testsupport/fileutils.h"	47 #include "webrtc/test/testsupport/fileutils.h"

21	48

22 namespace webrtc {	49 namespace webrtc {

23 namespace test {	50 namespace test {

(...skipping 13 matching lines...) Expand all Loading...
37 const std::vector<Turn> expected_timing = {	64 const std::vector<Turn> expected_timing = {

38 {"A", "a1", 0},	65 {"A", "a1", 0},

39 {"B", "b1", 0},	66 {"B", "b1", 0},

40 {"A", "a2", 100},	67 {"A", "a2", 100},

41 {"B", "b2", -200},	68 {"B", "b2", -200},

42 {"A", "a3", 0},	69 {"A", "a3", 0},

43 {"A", "a3", 0},	70 {"A", "a3", 0},

44 };	71 };

45 const std::size_t kNumberOfTurns = expected_timing.size();	72 const std::size_t kNumberOfTurns = expected_timing.size();

46	73

	74 // Fake audio track parameters.

	75 const MockWavReaderFactory::Params kMockWavReaderFactoryParams300ms =

	76 {48000, 1u, 14400u}; // 48kHz sample rate, mono, 0.3 seconds.

	77 const MockWavReaderFactory::Params kMockWavReaderFactoryParams500ms =

	78 {48000, 1u, 24000u}; // 48kHz sample rate, mono, 0.5 seconds.

	79 const MockWavReaderFactory::Params kMockWavReaderFactoryParams1000ms =

	80 {48000, 1u, 48000u}; // 48kHz sample rate, mono, 1 second.

	81

	82 // Default arguments for MockWavReaderFactory ctor.

	83 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =

	84 kMockWavReaderFactoryParams500ms;

	85 const std::map<std::string, const MockWavReaderFactory::Params>

	86 kDefaultMockWavReaderFactoryParamsMap = {

	87 {"t300", kMockWavReaderFactoryParams300ms},

	88 {"t500", kMockWavReaderFactoryParams500ms},

	89 {"t1000", kMockWavReaderFactoryParams1000ms},
	AleBzk 2017/03/28 13:11:10 t300, t500 and t1000 will be used as fake audio tr t300, t500 and t1000 will be used as fake audio track file names to let the factory automatically recall kMockWavReaderFactoryParams300ms, kMockWavReaderFactoryParams500ms and kMockWavReaderFactoryParams1000ms respectively.
	90 };

	91

	92 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {

	93 return std::unique_ptr<MockWavReaderFactory>(

	94 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

	95 kDefaultMockWavReaderFactoryParamsMap));

	96 }

	97

47 } // namespace	98 } // namespace

48	99

49 TEST(ConversationalSpeechTest, Settings) {	100 class ConversationalSpeechTest : public testing::Test {

	101 public:

	102 ConversationalSpeechTest() {

	103 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);

	104 }

	105 };

	106

	107 TEST_F(ConversationalSpeechTest, Settings) {

50 const conversational_speech::Config config(	108 const conversational_speech::Config config(

51 audiotracks_path, timing_filepath, output_path);	109 audiotracks_path, timing_filepath, output_path);

52	110

53 // Test getters.	111 // Test getters.

54 EXPECT_EQ(audiotracks_path, config.audiotracks_path());	112 EXPECT_EQ(audiotracks_path, config.audiotracks_path());

55 EXPECT_EQ(timing_filepath, config.timing_filepath());	113 EXPECT_EQ(timing_filepath, config.timing_filepath());

56 EXPECT_EQ(output_path, config.output_path());	114 EXPECT_EQ(output_path, config.output_path());

57 }	115 }

58	116

59 TEST(ConversationalSpeechTest, TimingSaveLoad) {	117 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {

60 // Save test timing.	118 // Save test timing.

61 const std::string temporary_filepath = webrtc::test::TempFilename(	119 const std::string temporary_filepath = webrtc::test::TempFilename(

62 webrtc::test::OutputPath(), "TempTimingTestFile");	120 webrtc::test::OutputPath(), "TempTimingTestFile");

63 SaveTiming(temporary_filepath, expected_timing);	121 SaveTiming(temporary_filepath, expected_timing);

64	122

65 // Create a std::vector<Turn> instance by loading from file.	123 // Create a std::vector<Turn> instance by loading from file.

66 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);	124 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);

67 std::remove(temporary_filepath.c_str());	125 std::remove(temporary_filepath.c_str());

68	126

69 // Check size.	127 // Check size.

70 EXPECT_EQ(expected_timing.size(), actual_timing.size());	128 EXPECT_EQ(expected_timing.size(), actual_timing.size());

71	129

72 // Check Turn instances.	130 // Check Turn instances.

73 for (size_t index = 0; index < expected_timing.size(); ++index) {	131 for (size_t index = 0; index < expected_timing.size(); ++index) {

74 EXPECT_EQ(expected_timing[index], actual_timing[index])	132 EXPECT_EQ(expected_timing[index], actual_timing[index])

75 << "turn #" << index << " not matching";	133 << "turn #" << index << " not matching";

76 }	134 }

77 }	135 }

78	136

79 TEST(ConversationalSpeechTest, MultiEndCallCreate) {	137 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {

80 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(	138 auto mock_wavreader_factory = CreateMockWavReaderFactory();

81 new MockWavReaderFactory());

82	139

83 // There are 5 unique audio tracks to read.	140 // There are 5 unique audio tracks to read.

84 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5);	141 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5);

85	142

86 // Inject the mock wav reader factory.	143 // Inject the mock wav reader factory.

87 conversational_speech::MultiEndCall multiend_call(	144 conversational_speech::MultiEndCall multiend_call(

88 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));	145 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));

	146 EXPECT_TRUE(multiend_call.valid());

89	147

90 // Test.	148 // Test.

91 EXPECT_EQ(2u, multiend_call.speaker_names().size());	149 EXPECT_EQ(2u, multiend_call.speaker_names().size());

92 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());	150 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());

93 }	151 }

94	152

	153 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) {

	154 const std::vector<Turn> timing = {

	155 {"A", "t500", -100},

	156 {"B", "t500", 0},

	157 };

	158 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	159

	160 // There is one unique audio track to read.

	161 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	162

	163 conversational_speech::MultiEndCall multiend_call(

	164 timing, audiotracks_path, std::move(mock_wavreader_factory));

	165 EXPECT_FALSE(multiend_call.valid());

	166 }

	167

	168

	169 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {

	170 // Accept:

	171 // A 0****.....

	172 // B .....1****

	173 const std::vector<Turn> timing = {

	174 {"A", "t500", 0},

	175 {"B", "t500", 0},

	176 };

	177 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	178

	179 // There is one unique audio track to read.

	180 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	181

	182 conversational_speech::MultiEndCall multiend_call(

	183 timing, audiotracks_path, std::move(mock_wavreader_factory));

	184 EXPECT_TRUE(multiend_call.valid());

	185 }

	186

	187 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {

	188 // Accept:

	189 // A 0****.......

	190 // B .......1****

	191 const std::vector<Turn> timing = {

	192 {"A", "t500", 0},

	193 {"B", "t500", 200},

	194 };

	195 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	196

	197 // There is one unique audio track to read.

	198 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	199

	200 conversational_speech::MultiEndCall multiend_call(

	201 timing, audiotracks_path, std::move(mock_wavreader_factory));

	202 EXPECT_TRUE(multiend_call.valid());

	203 }

	204

	205 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {

	206 // Accept:

	207 // A 0****...

	208 // B ...1****

	209 const std::vector<Turn> timing = {

	210 {"A", "t500", 0},

	211 {"B", "t500", -100},

	212 };

	213 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	214

	215 // There is one unique audio track to read.

	216 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	217

	218 conversational_speech::MultiEndCall multiend_call(

	219 timing, audiotracks_path, std::move(mock_wavreader_factory));

	220 EXPECT_TRUE(multiend_call.valid());

	221 }

	222

	223 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {

	224 // Reject:

	225 // A ..0****

	226 // B .1****. The n-th turn cannot start before the (n-1)-th one.

	227 const std::vector<Turn> timing = {

	228 {"A", "t500", 200},

	229 {"B", "t500", -600},

	230 };

	231 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	232

	233 // There is one unique audio track to read.

	234 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	235

	236 conversational_speech::MultiEndCall multiend_call(

	237 timing, audiotracks_path, std::move(mock_wavreader_factory));

	238 EXPECT_FALSE(multiend_call.valid());

	239 }

	240

	241 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {

	242 // Accept:

	243 // A 0*2**...

	244 // B ..1*********

	245 const std::vector<Turn> timing = {

	246 {"A", "t500", 0},

	247 {"B", "t1000", -200},

	248 {"A", "t500", -800},

	249 };

	250 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	251

	252 // There are two unique audio tracks to read.

	253 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	254

	255 conversational_speech::MultiEndCall multiend_call(

	256 timing, audiotracks_path, std::move(mock_wavreader_factory));

	257 EXPECT_TRUE(multiend_call.valid());

	258 }

	259

	260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {

	261 // Reject:

	262 // A 0****......

	263 // A ...1****...

	264 // B ......2****

	265 // ^ Turn #1 overlaps with #0 which is from the same speaker.

	266 const std::vector<Turn> timing = {

	267 {"A", "t500", 0},

	268 {"A", "t500", -200},

	269 {"B", "t500", -200},

	270 };

	271 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	272

	273 // There is one unique audio track to read.

	274 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	275

	276 conversational_speech::MultiEndCall multiend_call(

	277 timing, audiotracks_path, std::move(mock_wavreader_factory));

	278 EXPECT_FALSE(multiend_call.valid());

	279 }

	280

	281 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {

	282 // Reject:

	283 // A 0*********

	284 // B 1**.......

	285 // C ...2**....

	286 // A ......3**.

	287 // ^ Turn #3 overlaps with #0 which is from the same speaker.

	288 const std::vector<Turn> timing = {

	289 {"A", "t1000", 0},

	290 {"B", "t300", -1000},

	291 {"C", "t300", 0},

	292 {"A", "t300", 0},

	293 };

	294 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	295

	296 // There are two unique audio tracks to read.

	297 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	298

	299 conversational_speech::MultiEndCall multiend_call(

	300 timing, audiotracks_path, std::move(mock_wavreader_factory));

	301 EXPECT_FALSE(multiend_call.valid());

	302 }

	303

	304 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {

	305 // Accept:

	306 // A 0*********..

	307 // B ..1****.....

	308 // C .......2****

	309 const std::vector<Turn> timing = {

	310 {"A", "t1000", 0},

	311 {"B", "t500", -800},

	312 {"C", "t500", 0},

	313 };

	314 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	315

	316 // There are two unique audio tracks to read.

	317 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	318

	319 conversational_speech::MultiEndCall multiend_call(

	320 timing, audiotracks_path, std::move(mock_wavreader_factory));

	321 EXPECT_TRUE(multiend_call.valid());

	322 }

	323

	324 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {

	325 // Reject:

	326 // A 0*********

	327 // B ..1****...

	328 // C ....2****.

	329 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers

	330 // not permitted).

	331 const std::vector<Turn> timing = {

	332 {"A", "t1000", 0},

	333 {"B", "t500", -800},

	334 {"C", "t500", -300},

	335 };

	336 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	337

	338 // There are two unique audio tracks to read.

	339 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	340

	341 conversational_speech::MultiEndCall multiend_call(

	342 timing, audiotracks_path, std::move(mock_wavreader_factory));

	343 EXPECT_FALSE(multiend_call.valid());

	344 }

	345

	346 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {

	347 // Accept:

	348 // A 0*********..

	349 // B .2****......

	350 // C .......3****

	351 const std::vector<Turn> timing = {

	352 {"A", "t1000", 0},

	353 {"B", "t500", -900},

	354 {"C", "t500", 100},

	355 };

	356 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	357

	358 // There are two unique audio tracks to read.

	359 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	360

	361 conversational_speech::MultiEndCall multiend_call(

	362 timing, audiotracks_path, std::move(mock_wavreader_factory));

	363 EXPECT_TRUE(multiend_call.valid());

	364 }

	365

	366 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {

	367 // Accept:

	368 // A 0****

	369 // B 1****

	370 const std::vector<Turn> timing = {

	371 {"A", "t500", 0},

	372 {"B", "t500", -500},

	373 };

	374 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	375

	376 // There is one unique audio track to read.

	377 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	378

	379 conversational_speech::MultiEndCall multiend_call(

	380 timing, audiotracks_path, std::move(mock_wavreader_factory));

	381 EXPECT_TRUE(multiend_call.valid());

	382 }

	383

	384 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {

	385 // Accept:

	386 // A 0**....3.5...

	387 // B .....1**...4.....

	388 // C ......2.......6..

	389 const std::vector<Turn> timing = {

	390 {"A", "t500", 0},

	391 {"B", "t500", 0},

	392 {"C", "t300", -400},

	393 {"A", "t500", 0},

	394 {"B", "t300", -100},

	395 {"A", "t300", -100},

	396 {"C", "t300", -200},

	397 };

	398 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(

	399 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

	400 kDefaultMockWavReaderFactoryParamsMap));

	401

	402 // There are two unique audio tracks to read.

	403 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	404

	405 conversational_speech::MultiEndCall multiend_call(

	406 timing, audiotracks_path, std::move(mock_wavreader_factory));

	407 EXPECT_TRUE(multiend_call.valid());

	408 }

	409

	410 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {

	411 // Reject:

	412 // A 0**....3.6

	413 // B .....1**...4..

	414 // C ......2.....5..

	415 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+

	416 // speakers not permitted).

	417 const std::vector<Turn> timing = {

	418 {"A", "t500", 0},

	419 {"B", "t500", 0},

	420 {"C", "t300", -400},

	421 {"A", "t500", 0},

	422 {"B", "t300", -100},

	423 {"A", "t300", -200},

	424 {"C", "t300", -200},

	425 };

	426 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(

	427 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

	428 kDefaultMockWavReaderFactoryParamsMap));

	429

	430 // There are two unique audio tracks to read.

	431 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	432

	433 conversational_speech::MultiEndCall multiend_call(

	434 timing, audiotracks_path, std::move(mock_wavreader_factory));

	435 EXPECT_FALSE(multiend_call.valid());

	436 }

	437

95 } // namespace test	438 } // namespace test

96 } // namespace webrtc	439 } // namespace webrtc

OLD	NEW