Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(127)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: missing include to get std::back_inserter working on win targets Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 // This file consists of unit tests for webrtc::test::conversational_speech
12 // members. Part of them focus on accepting or rejecting different
13 // conversational speech setups. A setup is defined by a set of audio tracks and
14 // timing information).
15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest,
16 // MultiEndCallSetup*) function looks like the drawing below and indicates which
17 // setup is tested.
18 //
19 // Accept:
20 // A 0****.....
21 // B .....1****
22 //
23 // The drawing indicates the following:
24 // - the illustrated setup should be accepted,
25 // - there are two speakers (namely, A and B),
26 // - A is the first speaking, B is the second one,
27 // - each character after the speaker's letter indicates a time unit (e.g., 100
28 // ms),
29 // - "*" indicates speaking, "." listening,
30 // - numbers indicate the turn index in std::vector<Turn>.
31 //
32 // Note that the same speaker can appear in multiple lines in order to depict
33 // cases in which there are wrong offsets leading to self cross-talk (which is
34 // rejected).
35
11 #include <stdio.h> 36 #include <stdio.h>
37 #include <map>
12 #include <memory> 38 #include <memory>
13 39
40 #include "webrtc/base/logging.h"
14 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" 41 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
15 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" 42 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"
16 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 43 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
17 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" 44 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"
18 #include "webrtc/test/gmock.h" 45 #include "webrtc/test/gmock.h"
19 #include "webrtc/test/gtest.h" 46 #include "webrtc/test/gtest.h"
20 #include "webrtc/test/testsupport/fileutils.h" 47 #include "webrtc/test/testsupport/fileutils.h"
21 48
22 namespace webrtc { 49 namespace webrtc {
23 namespace test { 50 namespace test {
(...skipping 13 matching lines...) Expand all
37 const std::vector<Turn> expected_timing = { 64 const std::vector<Turn> expected_timing = {
38 {"A", "a1", 0}, 65 {"A", "a1", 0},
39 {"B", "b1", 0}, 66 {"B", "b1", 0},
40 {"A", "a2", 100}, 67 {"A", "a2", 100},
41 {"B", "b2", -200}, 68 {"B", "b2", -200},
42 {"A", "a3", 0}, 69 {"A", "a3", 0},
43 {"A", "a3", 0}, 70 {"A", "a3", 0},
44 }; 71 };
45 const std::size_t kNumberOfTurns = expected_timing.size(); 72 const std::size_t kNumberOfTurns = expected_timing.size();
46 73
74 // Default arguments for MockWavReaderFactory ctor.
75 // Fake audio track parameters.
76 constexpr int kDefaultSampleRate = 48000;
77 const std::map<std::string, const MockWavReaderFactory::Params>
78 kDefaultMockWavReaderFactoryParamsMap = {
79 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds.
80 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds.
81 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds.
82 };
83 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =
84 kDefaultMockWavReaderFactoryParamsMap.at("t500");
85
86 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {
87 return std::unique_ptr<MockWavReaderFactory>(
88 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
89 kDefaultMockWavReaderFactoryParamsMap));
90 }
91
47 } // namespace 92 } // namespace
48 93
49 TEST(ConversationalSpeechTest, Settings) { 94 using testing::_;
95
96 // TODO(alessiob): Remove fixture once conversational_speech fully implemented
97 // and replace TEST_F with TEST.
98 class ConversationalSpeechTest : public testing::Test {
99 public:
100 ConversationalSpeechTest() {
101 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
102 }
103 };
104
105 TEST_F(ConversationalSpeechTest, Settings) {
50 const conversational_speech::Config config( 106 const conversational_speech::Config config(
51 audiotracks_path, timing_filepath, output_path); 107 audiotracks_path, timing_filepath, output_path);
52 108
53 // Test getters. 109 // Test getters.
54 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); 110 EXPECT_EQ(audiotracks_path, config.audiotracks_path());
55 EXPECT_EQ(timing_filepath, config.timing_filepath()); 111 EXPECT_EQ(timing_filepath, config.timing_filepath());
56 EXPECT_EQ(output_path, config.output_path()); 112 EXPECT_EQ(output_path, config.output_path());
57 } 113 }
58 114
59 TEST(ConversationalSpeechTest, TimingSaveLoad) { 115 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
60 // Save test timing. 116 // Save test timing.
61 const std::string temporary_filepath = webrtc::test::TempFilename( 117 const std::string temporary_filepath = webrtc::test::TempFilename(
62 webrtc::test::OutputPath(), "TempTimingTestFile"); 118 webrtc::test::OutputPath(), "TempTimingTestFile");
63 SaveTiming(temporary_filepath, expected_timing); 119 SaveTiming(temporary_filepath, expected_timing);
64 120
65 // Create a std::vector<Turn> instance by loading from file. 121 // Create a std::vector<Turn> instance by loading from file.
66 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); 122 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);
67 std::remove(temporary_filepath.c_str()); 123 std::remove(temporary_filepath.c_str());
68 124
69 // Check size. 125 // Check size.
70 EXPECT_EQ(expected_timing.size(), actual_timing.size()); 126 EXPECT_EQ(expected_timing.size(), actual_timing.size());
71 127
72 // Check Turn instances. 128 // Check Turn instances.
73 for (size_t index = 0; index < expected_timing.size(); ++index) { 129 for (size_t index = 0; index < expected_timing.size(); ++index) {
74 EXPECT_EQ(expected_timing[index], actual_timing[index]) 130 EXPECT_EQ(expected_timing[index], actual_timing[index])
75 << "turn #" << index << " not matching"; 131 << "turn #" << index << " not matching";
76 } 132 }
77 } 133 }
78 134
79 TEST(ConversationalSpeechTest, MultiEndCallCreate) { 135 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
136 auto mock_wavreader_factory = CreateMockWavReaderFactory();
137
138 // There are 5 unique audio tracks to read.
139 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5);
140
141 // Inject the mock wav reader factory.
142 conversational_speech::MultiEndCall multiend_call(
143 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
144 EXPECT_TRUE(multiend_call.valid());
145
146 // Test.
147 EXPECT_EQ(2u, multiend_call.speaker_names().size());
148 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
149 EXPECT_EQ(6u, multiend_call.speaking_turns().size());
150 }
151
152 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
153 const std::vector<Turn> timing = {
154 {"A", "t500", -100},
155 {"B", "t500", 0},
156 };
157 auto mock_wavreader_factory = CreateMockWavReaderFactory();
158
159 // There is one unique audio track to read.
160 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
161
162 conversational_speech::MultiEndCall multiend_call(
163 timing, audiotracks_path, std::move(mock_wavreader_factory));
164 EXPECT_FALSE(multiend_call.valid());
165 }
166
167 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
168 // Accept:
169 // A 0****.....
170 // B .....1****
171 constexpr std::size_t expected_duration = kDefaultSampleRate;
172 const std::vector<Turn> timing = {
173 {"A", "t500", 0},
174 {"B", "t500", 0},
175 };
176 auto mock_wavreader_factory = CreateMockWavReaderFactory();
177
178 // There is one unique audio track to read.
179 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
180
181 conversational_speech::MultiEndCall multiend_call(
182 timing, audiotracks_path, std::move(mock_wavreader_factory));
183 EXPECT_TRUE(multiend_call.valid());
184
185 // Test.
186 EXPECT_EQ(2u, multiend_call.speaker_names().size());
187 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
188 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
189 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
190 }
191
192 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
193 // Accept:
194 // A 0****.......
195 // B .......1****
196 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
197 const std::vector<Turn> timing = {
198 {"A", "t500", 0},
199 {"B", "t500", 200},
200 };
201 auto mock_wavreader_factory = CreateMockWavReaderFactory();
202
203 // There is one unique audio track to read.
204 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
205
206 conversational_speech::MultiEndCall multiend_call(
207 timing, audiotracks_path, std::move(mock_wavreader_factory));
208 EXPECT_TRUE(multiend_call.valid());
209
210 // Test.
211 EXPECT_EQ(2u, multiend_call.speaker_names().size());
212 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
213 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
214 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
215 }
216
217 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
218 // Accept:
219 // A 0****....
220 // B ....1****
221 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9;
222 const std::vector<Turn> timing = {
223 {"A", "t500", 0},
224 {"B", "t500", -100},
225 };
226 auto mock_wavreader_factory = CreateMockWavReaderFactory();
227
228 // There is one unique audio track to read.
229 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
230
231 conversational_speech::MultiEndCall multiend_call(
232 timing, audiotracks_path, std::move(mock_wavreader_factory));
233 EXPECT_TRUE(multiend_call.valid());
234
235 // Test.
236 EXPECT_EQ(2u, multiend_call.speaker_names().size());
237 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
238 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
239 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
240 }
241
242 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
243 // Reject:
244 // A ..0****
245 // B .1****. The n-th turn cannot start before the (n-1)-th one.
246 const std::vector<Turn> timing = {
247 {"A", "t500", 200},
248 {"B", "t500", -600},
249 };
250 auto mock_wavreader_factory = CreateMockWavReaderFactory();
251
252 // There is one unique audio track to read.
253 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
254
255 conversational_speech::MultiEndCall multiend_call(
256 timing, audiotracks_path, std::move(mock_wavreader_factory));
257 EXPECT_FALSE(multiend_call.valid());
258 }
259
260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
261 // Accept:
262 // A 0****2****...
263 // B ...1*********
264 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3;
265 const std::vector<Turn> timing = {
266 {"A", "t500", 0},
267 {"B", "t1000", -200},
268 {"A", "t500", -800},
269 };
270 auto mock_wavreader_factory = CreateMockWavReaderFactory();
271
272 // There are two unique audio tracks to read.
273 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
274
275 conversational_speech::MultiEndCall multiend_call(
276 timing, audiotracks_path, std::move(mock_wavreader_factory));
277 EXPECT_TRUE(multiend_call.valid());
278
279 // Test.
280 EXPECT_EQ(2u, multiend_call.speaker_names().size());
281 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
282 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
283 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
284 }
285
286 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
287 // Reject:
288 // A 0****......
289 // A ...1****...
290 // B ......2****
291 // ^ Turn #1 overlaps with #0 which is from the same speaker.
292 const std::vector<Turn> timing = {
293 {"A", "t500", 0},
294 {"A", "t500", -200},
295 {"B", "t500", -200},
296 };
297 auto mock_wavreader_factory = CreateMockWavReaderFactory();
298
299 // There is one unique audio track to read.
300 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
301
302 conversational_speech::MultiEndCall multiend_call(
303 timing, audiotracks_path, std::move(mock_wavreader_factory));
304 EXPECT_FALSE(multiend_call.valid());
305 }
306
307 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
308 // Reject:
309 // A 0*********
310 // B 1**.......
311 // C ...2**....
312 // A ......3**.
313 // ^ Turn #3 overlaps with #0 which is from the same speaker.
314 const std::vector<Turn> timing = {
315 {"A", "t1000", 0},
316 {"B", "t300", -1000},
317 {"C", "t300", 0},
318 {"A", "t300", 0},
319 };
320 auto mock_wavreader_factory = CreateMockWavReaderFactory();
321
322 // There are two unique audio tracks to read.
323 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
324
325 conversational_speech::MultiEndCall multiend_call(
326 timing, audiotracks_path, std::move(mock_wavreader_factory));
327 EXPECT_FALSE(multiend_call.valid());
328 }
329
330 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
331 // Accept:
332 // A 0*********..
333 // B ..1****.....
334 // C .......2****
335 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
336 const std::vector<Turn> timing = {
337 {"A", "t1000", 0},
338 {"B", "t500", -800},
339 {"C", "t500", 0},
340 };
341 auto mock_wavreader_factory = CreateMockWavReaderFactory();
342
343 // There are two unique audio tracks to read.
344 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
345
346 conversational_speech::MultiEndCall multiend_call(
347 timing, audiotracks_path, std::move(mock_wavreader_factory));
348 EXPECT_TRUE(multiend_call.valid());
349
350 // Test.
351 EXPECT_EQ(3u, multiend_call.speaker_names().size());
352 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
353 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
354 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
355 }
356
357 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
358 // Reject:
359 // A 0*********
360 // B ..1****...
361 // C ....2****.
362 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers
363 // not permitted).
364 const std::vector<Turn> timing = {
365 {"A", "t1000", 0},
366 {"B", "t500", -800},
367 {"C", "t500", -300},
368 };
369 auto mock_wavreader_factory = CreateMockWavReaderFactory();
370
371 // There are two unique audio tracks to read.
372 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
373
374 conversational_speech::MultiEndCall multiend_call(
375 timing, audiotracks_path, std::move(mock_wavreader_factory));
376 EXPECT_FALSE(multiend_call.valid());
377 }
378
379 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
380 // Accept:
381 // A 0*********..
382 // B .2****......
383 // C .......3****
384 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
385 const std::vector<Turn> timing = {
386 {"A", "t1000", 0},
387 {"B", "t500", -900},
388 {"C", "t500", 100},
389 };
390 auto mock_wavreader_factory = CreateMockWavReaderFactory();
391
392 // There are two unique audio tracks to read.
393 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
394
395 conversational_speech::MultiEndCall multiend_call(
396 timing, audiotracks_path, std::move(mock_wavreader_factory));
397 EXPECT_TRUE(multiend_call.valid());
398
399 // Test.
400 EXPECT_EQ(3u, multiend_call.speaker_names().size());
401 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
402 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
403 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
404 }
405
406 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
407 // Accept:
408 // A 0****
409 // B 1****
410 const std::vector<Turn> timing = {
411 {"A", "t500", 0},
412 {"B", "t500", -500},
413 };
414 auto mock_wavreader_factory = CreateMockWavReaderFactory();
415
416 // There is one unique audio track to read.
417 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
418
419 conversational_speech::MultiEndCall multiend_call(
420 timing, audiotracks_path, std::move(mock_wavreader_factory));
421 EXPECT_TRUE(multiend_call.valid());
422
423 // Test.
424 EXPECT_EQ(2u, multiend_call.speaker_names().size());
425 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
426 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
427 }
428
429 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
430 // Accept:
431 // A 0****....3****.5**.
432 // B .....1****...4**...
433 // C ......2**.......6**..
434 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9;
435 const std::vector<Turn> timing = {
436 {"A", "t500", 0},
437 {"B", "t500", 0},
438 {"C", "t300", -400},
439 {"A", "t500", 0},
440 {"B", "t300", -100},
441 {"A", "t300", -100},
442 {"C", "t300", -200},
443 };
80 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( 444 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
81 new MockWavReaderFactory()); 445 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
82 446 kDefaultMockWavReaderFactoryParamsMap));
83 // There are 5 unique audio tracks to read. 447
84 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); 448 // There are two unique audio tracks to read.
85 449 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
86 // Inject the mock wav reader factory. 450
87 conversational_speech::MultiEndCall multiend_call( 451 conversational_speech::MultiEndCall multiend_call(
88 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); 452 timing, audiotracks_path, std::move(mock_wavreader_factory));
89 453 EXPECT_TRUE(multiend_call.valid());
90 // Test. 454
91 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 455 // Test.
92 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); 456 EXPECT_EQ(3u, multiend_call.speaker_names().size());
457 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
458 EXPECT_EQ(7u, multiend_call.speaking_turns().size());
459 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
460 }
461
462 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
463 // Reject:
464 // A 0****....3****.6**
465 // B .....1****...4**..
466 // C ......2**.....5**..
467 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+
468 // speakers not permitted).
469 const std::vector<Turn> timing = {
470 {"A", "t500", 0},
471 {"B", "t500", 0},
472 {"C", "t300", -400},
473 {"A", "t500", 0},
474 {"B", "t300", -100},
475 {"A", "t300", -200},
476 {"C", "t300", -200},
477 };
478 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
479 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
480 kDefaultMockWavReaderFactoryParamsMap));
481
482 // There are two unique audio tracks to read.
483 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
484
485 conversational_speech::MultiEndCall multiend_call(
486 timing, audiotracks_path, std::move(mock_wavreader_factory));
487 EXPECT_FALSE(multiend_call.valid());
93 } 488 }
94 489
95 } // namespace test 490 } // namespace test
96 } // namespace webrtc 491 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698