Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(418)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: rebase Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 // This file consists of unit tests for webrtc::test::conversational_speech
12 // members. Part of the tests focus on accepting or rejecting different
13 // conversational speech setups. A setup is defined by a set of audio tracks and
14 // timing information.
15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest,
16 // MultiEndCallSetup*) function looks like the drawing below and indicates which
17 // setup is tested.
18 //
19 // Accept:
20 // A 0****.....
21 // B .....1****
22 //
23 // The drawing indicates the following:
24 // - the illustrated setup should be accepted,
25 // - there are two speakers (namely, A and B),
26 // - A is the first speaking, B is the second one,
27 // - each character after the speaker's letter indicates a time unit (e.g., 100
28 // ms),
29 // - "*" indicates speaking, "." listening,
30 // - numbers indicate the turn index in std::vector<Turn>.
31 //
32 // Note that the same speaker can appear in multiple lines in order to depict
33 // cases in which there are wrong offsets leading to self cross-talk (which is
34 // rejected).
35
11 #include <stdio.h> 36 #include <stdio.h>
37 #include <map>
12 #include <memory> 38 #include <memory>
13 39
40 #include "webrtc/base/logging.h"
14 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" 41 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
15 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" 42 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"
16 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 43 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
17 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" 44 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"
18 #include "webrtc/test/gmock.h" 45 #include "webrtc/test/gmock.h"
19 #include "webrtc/test/gtest.h" 46 #include "webrtc/test/gtest.h"
20 #include "webrtc/test/testsupport/fileutils.h" 47 #include "webrtc/test/testsupport/fileutils.h"
21 48
22 namespace webrtc { 49 namespace webrtc {
23 namespace test { 50 namespace test {
(...skipping 13 matching lines...) Expand all
37 const std::vector<Turn> expected_timing = { 64 const std::vector<Turn> expected_timing = {
38 {"A", "a1", 0}, 65 {"A", "a1", 0},
39 {"B", "b1", 0}, 66 {"B", "b1", 0},
40 {"A", "a2", 100}, 67 {"A", "a2", 100},
41 {"B", "b2", -200}, 68 {"B", "b2", -200},
42 {"A", "a3", 0}, 69 {"A", "a3", 0},
43 {"A", "a3", 0}, 70 {"A", "a3", 0},
44 }; 71 };
45 const std::size_t kNumberOfTurns = expected_timing.size(); 72 const std::size_t kNumberOfTurns = expected_timing.size();
46 73
74 // Fake audio track parameters.
75 const MockWavReaderFactory::Params kMockWavReaderFactoryParams300ms =
76 {48000, 1u, 14400u}; // 48kHz sample rate, mono, 0.3 seconds.
77 const MockWavReaderFactory::Params kMockWavReaderFactoryParams500ms =
78 {48000, 1u, 24000u}; // 48kHz sample rate, mono, 0.5 seconds.
79 const MockWavReaderFactory::Params kMockWavReaderFactoryParams1000ms =
80 {48000, 1u, 48000u}; // 48kHz sample rate, mono, 1 second.
81
82 // Default arguments for MockWavReaderFactory ctor.
83 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =
84 kMockWavReaderFactoryParams500ms;
85 const std::map<std::string, const MockWavReaderFactory::Params>
86 kDefaultMockWavReaderFactoryParamsMap = {
87 {"t300", kMockWavReaderFactoryParams300ms},
88 {"t500", kMockWavReaderFactoryParams500ms},
89 {"t1000", kMockWavReaderFactoryParams1000ms},
AleBzk 2017/03/28 13:11:10 t300, t500 and t1000 will be used as fake audio tr
90 };
91
92 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {
93 return std::unique_ptr<MockWavReaderFactory>(
94 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
95 kDefaultMockWavReaderFactoryParamsMap));
96 }
97
47 } // namespace 98 } // namespace
48 99
49 TEST(ConversationalSpeechTest, Settings) { 100 class ConversationalSpeechTest : public testing::Test {
101 public:
102 ConversationalSpeechTest() {
103 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
104 }
105 };
106
107 TEST_F(ConversationalSpeechTest, Settings) {
50 const conversational_speech::Config config( 108 const conversational_speech::Config config(
51 audiotracks_path, timing_filepath, output_path); 109 audiotracks_path, timing_filepath, output_path);
52 110
53 // Test getters. 111 // Test getters.
54 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); 112 EXPECT_EQ(audiotracks_path, config.audiotracks_path());
55 EXPECT_EQ(timing_filepath, config.timing_filepath()); 113 EXPECT_EQ(timing_filepath, config.timing_filepath());
56 EXPECT_EQ(output_path, config.output_path()); 114 EXPECT_EQ(output_path, config.output_path());
57 } 115 }
58 116
59 TEST(ConversationalSpeechTest, TimingSaveLoad) { 117 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
60 // Save test timing. 118 // Save test timing.
61 const std::string temporary_filepath = webrtc::test::TempFilename( 119 const std::string temporary_filepath = webrtc::test::TempFilename(
62 webrtc::test::OutputPath(), "TempTimingTestFile"); 120 webrtc::test::OutputPath(), "TempTimingTestFile");
63 SaveTiming(temporary_filepath, expected_timing); 121 SaveTiming(temporary_filepath, expected_timing);
64 122
65 // Create a std::vector<Turn> instance by loading from file. 123 // Create a std::vector<Turn> instance by loading from file.
66 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); 124 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);
67 std::remove(temporary_filepath.c_str()); 125 std::remove(temporary_filepath.c_str());
68 126
69 // Check size. 127 // Check size.
70 EXPECT_EQ(expected_timing.size(), actual_timing.size()); 128 EXPECT_EQ(expected_timing.size(), actual_timing.size());
71 129
72 // Check Turn instances. 130 // Check Turn instances.
73 for (size_t index = 0; index < expected_timing.size(); ++index) { 131 for (size_t index = 0; index < expected_timing.size(); ++index) {
74 EXPECT_EQ(expected_timing[index], actual_timing[index]) 132 EXPECT_EQ(expected_timing[index], actual_timing[index])
75 << "turn #" << index << " not matching"; 133 << "turn #" << index << " not matching";
76 } 134 }
77 } 135 }
78 136
79 TEST(ConversationalSpeechTest, MultiEndCallCreate) { 137 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) {
80 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( 138 auto mock_wavreader_factory = CreateMockWavReaderFactory();
81 new MockWavReaderFactory());
82 139
83 // There are 5 unique audio tracks to read. 140 // There are 5 unique audio tracks to read.
84 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); 141 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5);
85 142
86 // Inject the mock wav reader factory. 143 // Inject the mock wav reader factory.
87 conversational_speech::MultiEndCall multiend_call( 144 conversational_speech::MultiEndCall multiend_call(
88 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); 145 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
146 EXPECT_TRUE(multiend_call.valid());
89 147
90 // Test. 148 // Test.
91 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 149 EXPECT_EQ(2u, multiend_call.speaker_names().size());
92 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); 150 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
93 } 151 }
94 152
153 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) {
154 const std::vector<Turn> timing = {
155 {"A", "t500", -100},
156 {"B", "t500", 0},
157 };
158 auto mock_wavreader_factory = CreateMockWavReaderFactory();
159
160 // There is one unique audio track to read.
161 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
162
163 conversational_speech::MultiEndCall multiend_call(
164 timing, audiotracks_path, std::move(mock_wavreader_factory));
165 EXPECT_FALSE(multiend_call.valid());
166 }
167
168
169 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) {
170 // Accept:
171 // A 0****.....
172 // B .....1****
173 const std::vector<Turn> timing = {
174 {"A", "t500", 0},
175 {"B", "t500", 0},
176 };
177 auto mock_wavreader_factory = CreateMockWavReaderFactory();
178
179 // There is one unique audio track to read.
180 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
181
182 conversational_speech::MultiEndCall multiend_call(
183 timing, audiotracks_path, std::move(mock_wavreader_factory));
184 EXPECT_TRUE(multiend_call.valid());
185 }
186
187 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) {
188 // Accept:
189 // A 0****.......
190 // B .......1****
191 const std::vector<Turn> timing = {
192 {"A", "t500", 0},
193 {"B", "t500", 200},
194 };
195 auto mock_wavreader_factory = CreateMockWavReaderFactory();
196
197 // There is one unique audio track to read.
198 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
199
200 conversational_speech::MultiEndCall multiend_call(
201 timing, audiotracks_path, std::move(mock_wavreader_factory));
202 EXPECT_TRUE(multiend_call.valid());
203 }
204
205 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
206 // Accept:
207 // A 0****...
208 // B ...1****
209 const std::vector<Turn> timing = {
210 {"A", "t500", 0},
211 {"B", "t500", -100},
212 };
213 auto mock_wavreader_factory = CreateMockWavReaderFactory();
214
215 // There is one unique audio track to read.
216 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
217
218 conversational_speech::MultiEndCall multiend_call(
219 timing, audiotracks_path, std::move(mock_wavreader_factory));
220 EXPECT_TRUE(multiend_call.valid());
221 }
222
223 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
224 // Reject:
225 // A ..0****
226 // B .1****. The n-th turn cannot start before the (n-1)-th one.
227 const std::vector<Turn> timing = {
228 {"A", "t500", 200},
229 {"B", "t500", -600},
230 };
231 auto mock_wavreader_factory = CreateMockWavReaderFactory();
232
233 // There is one unique audio track to read.
234 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
235
236 conversational_speech::MultiEndCall multiend_call(
237 timing, audiotracks_path, std::move(mock_wavreader_factory));
238 EXPECT_FALSE(multiend_call.valid());
239 }
240
241 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
242 // Accept:
243 // A 0***2****...
244 // B ..1*********
245 const std::vector<Turn> timing = {
246 {"A", "t500", 0},
247 {"B", "t1000", -200},
248 {"A", "t500", -800},
249 };
250 auto mock_wavreader_factory = CreateMockWavReaderFactory();
251
252 // There are two unique audio tracks to read.
253 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
254
255 conversational_speech::MultiEndCall multiend_call(
256 timing, audiotracks_path, std::move(mock_wavreader_factory));
257 EXPECT_TRUE(multiend_call.valid());
258 }
259
260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
261 // Reject:
262 // A 0****......
263 // A ...1****...
264 // B ......2****
265 // ^ Turn #1 overlaps with #0 which is from the same speaker.
266 const std::vector<Turn> timing = {
267 {"A", "t500", 0},
268 {"A", "t500", -200},
269 {"B", "t500", -200},
270 };
271 auto mock_wavreader_factory = CreateMockWavReaderFactory();
272
273 // There is one unique audio track to read.
274 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
275
276 conversational_speech::MultiEndCall multiend_call(
277 timing, audiotracks_path, std::move(mock_wavreader_factory));
278 EXPECT_FALSE(multiend_call.valid());
279 }
280
281 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
282 // Reject:
283 // A 0*********
284 // B 1**.......
285 // C ...2**....
286 // A ......3**.
287 // ^ Turn #3 overlaps with #0 which is from the same speaker.
288 const std::vector<Turn> timing = {
289 {"A", "t1000", 0},
290 {"B", "t300", -1000},
291 {"C", "t300", 0},
292 {"A", "t300", 0},
293 };
294 auto mock_wavreader_factory = CreateMockWavReaderFactory();
295
296 // There are two unique audio tracks to read.
297 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
298
299 conversational_speech::MultiEndCall multiend_call(
300 timing, audiotracks_path, std::move(mock_wavreader_factory));
301 EXPECT_FALSE(multiend_call.valid());
302 }
303
304 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
305 // Accept:
306 // A 0*********..
307 // B ..1****.....
308 // C .......2****
309 const std::vector<Turn> timing = {
310 {"A", "t1000", 0},
311 {"B", "t500", -800},
312 {"C", "t500", 0},
313 };
314 auto mock_wavreader_factory = CreateMockWavReaderFactory();
315
316 // There are two unique audio tracks to read.
317 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
318
319 conversational_speech::MultiEndCall multiend_call(
320 timing, audiotracks_path, std::move(mock_wavreader_factory));
321 EXPECT_TRUE(multiend_call.valid());
322 }
323
324 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
325 // Reject:
326 // A 0*********
327 // B ..1****...
328 // C ....2****.
329 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers
330 // not permitted).
331 const std::vector<Turn> timing = {
332 {"A", "t1000", 0},
333 {"B", "t500", -800},
334 {"C", "t500", -300},
335 };
336 auto mock_wavreader_factory = CreateMockWavReaderFactory();
337
338 // There are two unique audio tracks to read.
339 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
340
341 conversational_speech::MultiEndCall multiend_call(
342 timing, audiotracks_path, std::move(mock_wavreader_factory));
343 EXPECT_FALSE(multiend_call.valid());
344 }
345
346 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
347 // Accept:
348 // A 0*********..
349 // B .2****......
350 // C .......3****
351 const std::vector<Turn> timing = {
352 {"A", "t1000", 0},
353 {"B", "t500", -900},
354 {"C", "t500", 100},
355 };
356 auto mock_wavreader_factory = CreateMockWavReaderFactory();
357
358 // There are two unique audio tracks to read.
359 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
360
361 conversational_speech::MultiEndCall multiend_call(
362 timing, audiotracks_path, std::move(mock_wavreader_factory));
363 EXPECT_TRUE(multiend_call.valid());
364 }
365
366 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
367 // Accept:
368 // A 0****
369 // B 1****
370 const std::vector<Turn> timing = {
371 {"A", "t500", 0},
372 {"B", "t500", -500},
373 };
374 auto mock_wavreader_factory = CreateMockWavReaderFactory();
375
376 // There is one unique audio track to read.
377 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
378
379 conversational_speech::MultiEndCall multiend_call(
380 timing, audiotracks_path, std::move(mock_wavreader_factory));
381 EXPECT_TRUE(multiend_call.valid());
382 }
383
384 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
385 // Accept:
386 // A 0****....3****.5**...
387 // B .....1****...4**.....
388 // C ......2**.......6**..
389 const std::vector<Turn> timing = {
390 {"A", "t500", 0},
391 {"B", "t500", 0},
392 {"C", "t300", -400},
393 {"A", "t500", 0},
394 {"B", "t300", -100},
395 {"A", "t300", -100},
396 {"C", "t300", -200},
397 };
398 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
399 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
400 kDefaultMockWavReaderFactoryParamsMap));
401
402 // There are two unique audio tracks to read.
403 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
404
405 conversational_speech::MultiEndCall multiend_call(
406 timing, audiotracks_path, std::move(mock_wavreader_factory));
407 EXPECT_TRUE(multiend_call.valid());
408 }
409
410 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
411 // Reject:
412 // A 0****....3****.6**
413 // B .....1****...4**..
414 // C ......2**.....5**..
415 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+
416 // speakers not permitted).
417 const std::vector<Turn> timing = {
418 {"A", "t500", 0},
419 {"B", "t500", 0},
420 {"C", "t300", -400},
421 {"A", "t500", 0},
422 {"B", "t300", -100},
423 {"A", "t300", -200},
424 {"C", "t300", -200},
425 };
426 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
427 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
428 kDefaultMockWavReaderFactoryParamsMap));
429
430 // There are two unique audio tracks to read.
431 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
432
433 conversational_speech::MultiEndCall multiend_call(
434 timing, audiotracks_path, std::move(mock_wavreader_factory));
435 EXPECT_FALSE(multiend_call.valid());
436 }
437
95 } // namespace test 438 } // namespace test
96 } // namespace webrtc 439 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698