Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2808053002: Conversational Speech tool completed (Closed)
Patch Set: merge Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 // This file consists of unit tests for webrtc::test::conversational_speech 11 // This file consists of unit tests for webrtc::test::conversational_speech
12 // members. Part of them focus on accepting or rejecting different 12 // members. Part of them focus on accepting or rejecting different
13 // conversational speech setups. A setup is defined by a set of audio tracks and 13 // conversational speech setups. A setup is defined by a set of audio tracks and
14 // timing information). 14 // timing information).
15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest, 15 // The docstring at the beginning of each TEST(ConversationalSpeechTest,
16 // MultiEndCallSetup*) function looks like the drawing below and indicates which 16 // MultiEndCallSetup*) function looks like the drawing below and indicates which
17 // setup is tested. 17 // setup is tested.
18 // 18 //
19 // Accept: 19 // Accept:
20 // A 0****..... 20 // A 0****.....
21 // B .....1**** 21 // B .....1****
22 // 22 //
23 // The drawing indicates the following: 23 // The drawing indicates the following:
24 // - the illustrated setup should be accepted, 24 // - the illustrated setup should be accepted,
25 // - there are two speakers (namely, A and B), 25 // - there are two speakers (namely, A and B),
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
167 } 167 }
168 } 168 }
169 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. 169 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.
170 RemoveDir(dir); 170 RemoveDir(dir);
171 } 171 }
172 172
173 } // namespace 173 } // namespace
174 174
175 using testing::_; 175 using testing::_;
176 176
177 // TODO(alessiob): Remove fixture once conversational_speech fully implemented 177 TEST(ConversationalSpeechTest, Settings) {
178 // and replace TEST_F with TEST.
179 class ConversationalSpeechTest : public testing::Test {
180 public:
181 ConversationalSpeechTest() {
182 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
183 }
184 };
185
186 TEST_F(ConversationalSpeechTest, Settings) {
187 const conversational_speech::Config config( 178 const conversational_speech::Config config(
188 audiotracks_path, timing_filepath, output_path); 179 audiotracks_path, timing_filepath, output_path);
189 180
190 // Test getters. 181 // Test getters.
191 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); 182 EXPECT_EQ(audiotracks_path, config.audiotracks_path());
192 EXPECT_EQ(timing_filepath, config.timing_filepath()); 183 EXPECT_EQ(timing_filepath, config.timing_filepath());
193 EXPECT_EQ(output_path, config.output_path()); 184 EXPECT_EQ(output_path, config.output_path());
194 } 185 }
195 186
196 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { 187 TEST(ConversationalSpeechTest, TimingSaveLoad) {
197 // Save test timing. 188 // Save test timing.
198 const std::string temporary_filepath = TempFilename( 189 const std::string temporary_filepath = TempFilename(
199 OutputPath(), "TempTimingTestFile"); 190 OutputPath(), "TempTimingTestFile");
200 SaveTiming(temporary_filepath, expected_timing); 191 SaveTiming(temporary_filepath, expected_timing);
201 192
202 // Create a std::vector<Turn> instance by loading from file. 193 // Create a std::vector<Turn> instance by loading from file.
203 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); 194 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);
204 std::remove(temporary_filepath.c_str()); 195 std::remove(temporary_filepath.c_str());
205 196
206 // Check size. 197 // Check size.
207 EXPECT_EQ(expected_timing.size(), actual_timing.size()); 198 EXPECT_EQ(expected_timing.size(), actual_timing.size());
208 199
209 // Check Turn instances. 200 // Check Turn instances.
210 for (size_t index = 0; index < expected_timing.size(); ++index) { 201 for (size_t index = 0; index < expected_timing.size(); ++index) {
211 EXPECT_EQ(expected_timing[index], actual_timing[index]) 202 EXPECT_EQ(expected_timing[index], actual_timing[index])
212 << "turn #" << index << " not matching"; 203 << "turn #" << index << " not matching";
213 } 204 }
214 } 205 }
215 206
216 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { 207 TEST(ConversationalSpeechTest, MultiEndCallCreate) {
217 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 208 auto mock_wavreader_factory = CreateMockWavReaderFactory();
218 209
219 // There are 5 unique audio tracks to read. 210 // There are 5 unique audio tracks to read.
220 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); 211 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5);
221 212
222 // Inject the mock wav reader factory. 213 // Inject the mock wav reader factory.
223 conversational_speech::MultiEndCall multiend_call( 214 conversational_speech::MultiEndCall multiend_call(
224 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); 215 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
225 EXPECT_TRUE(multiend_call.valid()); 216 EXPECT_TRUE(multiend_call.valid());
226 217
227 // Test. 218 // Test.
228 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 219 EXPECT_EQ(2u, multiend_call.speaker_names().size());
229 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); 220 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
230 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); 221 EXPECT_EQ(6u, multiend_call.speaking_turns().size());
231 } 222 }
232 223
233 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { 224 TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
234 const std::vector<Turn> timing = { 225 const std::vector<Turn> timing = {
235 {"A", "sr8000", 0}, 226 {"A", "sr8000", 0},
236 {"B", "sr16000", 0}, 227 {"B", "sr16000", 0},
237 }; 228 };
238 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 229 auto mock_wavreader_factory = CreateMockWavReaderFactory();
239 230
240 // There are two unique audio tracks to read. 231 // There are two unique audio tracks to read.
241 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); 232 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
242 233
243 MultiEndCall multiend_call( 234 MultiEndCall multiend_call(
244 timing, audiotracks_path, std::move(mock_wavreader_factory)); 235 timing, audiotracks_path, std::move(mock_wavreader_factory));
245 EXPECT_FALSE(multiend_call.valid()); 236 EXPECT_FALSE(multiend_call.valid());
246 } 237 }
247 238
248 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { 239 TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
249 const std::vector<Turn> timing = { 240 const std::vector<Turn> timing = {
250 {"A", "sr16000_stereo", 0}, 241 {"A", "sr16000_stereo", 0},
251 {"B", "sr16000_stereo", 0}, 242 {"B", "sr16000_stereo", 0},
252 }; 243 };
253 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 244 auto mock_wavreader_factory = CreateMockWavReaderFactory();
254 245
255 // There is one unique audio track to read. 246 // There is one unique audio track to read.
256 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); 247 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
257 248
258 MultiEndCall multiend_call( 249 MultiEndCall multiend_call(
259 timing, audiotracks_path, std::move(mock_wavreader_factory)); 250 timing, audiotracks_path, std::move(mock_wavreader_factory));
260 EXPECT_FALSE(multiend_call.valid()); 251 EXPECT_FALSE(multiend_call.valid());
261 } 252 }
262 253
263 TEST_F(ConversationalSpeechTest, 254 TEST(ConversationalSpeechTest,
264 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { 255 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
265 const std::vector<Turn> timing = { 256 const std::vector<Turn> timing = {
266 {"A", "sr8000", 0}, 257 {"A", "sr8000", 0},
267 {"B", "sr16000_stereo", 0}, 258 {"B", "sr16000_stereo", 0},
268 }; 259 };
269 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 260 auto mock_wavreader_factory = CreateMockWavReaderFactory();
270 261
271 // There are two unique audio tracks to read. 262 // There are two unique audio tracks to read.
272 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); 263 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
273 264
274 MultiEndCall multiend_call( 265 MultiEndCall multiend_call(
275 timing, audiotracks_path, std::move(mock_wavreader_factory)); 266 timing, audiotracks_path, std::move(mock_wavreader_factory));
276 EXPECT_FALSE(multiend_call.valid()); 267 EXPECT_FALSE(multiend_call.valid());
277 } 268 }
278 269
279 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { 270 TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
280 const std::vector<Turn> timing = { 271 const std::vector<Turn> timing = {
281 {"A", "t500", -100}, 272 {"A", "t500", -100},
282 {"B", "t500", 0}, 273 {"B", "t500", 0},
283 }; 274 };
284 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 275 auto mock_wavreader_factory = CreateMockWavReaderFactory();
285 276
286 // There is one unique audio track to read. 277 // There is one unique audio track to read.
287 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 278 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
288 279
289 conversational_speech::MultiEndCall multiend_call( 280 conversational_speech::MultiEndCall multiend_call(
290 timing, audiotracks_path, std::move(mock_wavreader_factory)); 281 timing, audiotracks_path, std::move(mock_wavreader_factory));
291 EXPECT_FALSE(multiend_call.valid()); 282 EXPECT_FALSE(multiend_call.valid());
292 } 283 }
293 284
294 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) { 285 TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) {
295 // Accept: 286 // Accept:
296 // A 0****..... 287 // A 0****.....
297 // B .....1**** 288 // B .....1****
298 constexpr std::size_t expected_duration = kDefaultSampleRate; 289 constexpr std::size_t expected_duration = kDefaultSampleRate;
299 const std::vector<Turn> timing = { 290 const std::vector<Turn> timing = {
300 {"A", "t500", 0}, 291 {"A", "t500", 0},
301 {"B", "t500", 0}, 292 {"B", "t500", 0},
302 }; 293 };
303 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 294 auto mock_wavreader_factory = CreateMockWavReaderFactory();
304 295
305 // There is one unique audio track to read. 296 // There is one unique audio track to read.
306 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 297 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
307 298
308 conversational_speech::MultiEndCall multiend_call( 299 conversational_speech::MultiEndCall multiend_call(
309 timing, audiotracks_path, std::move(mock_wavreader_factory)); 300 timing, audiotracks_path, std::move(mock_wavreader_factory));
310 EXPECT_TRUE(multiend_call.valid()); 301 EXPECT_TRUE(multiend_call.valid());
311 302
312 // Test. 303 // Test.
313 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 304 EXPECT_EQ(2u, multiend_call.speaker_names().size());
314 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 305 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
315 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 306 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
316 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 307 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
317 } 308 }
318 309
319 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) { 310 TEST(ConversationalSpeechTest, MultiEndCallSetupPause) {
320 // Accept: 311 // Accept:
321 // A 0****....... 312 // A 0****.......
322 // B .......1**** 313 // B .......1****
323 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; 314 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
324 const std::vector<Turn> timing = { 315 const std::vector<Turn> timing = {
325 {"A", "t500", 0}, 316 {"A", "t500", 0},
326 {"B", "t500", 200}, 317 {"B", "t500", 200},
327 }; 318 };
328 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 319 auto mock_wavreader_factory = CreateMockWavReaderFactory();
329 320
330 // There is one unique audio track to read. 321 // There is one unique audio track to read.
331 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 322 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
332 323
333 conversational_speech::MultiEndCall multiend_call( 324 conversational_speech::MultiEndCall multiend_call(
334 timing, audiotracks_path, std::move(mock_wavreader_factory)); 325 timing, audiotracks_path, std::move(mock_wavreader_factory));
335 EXPECT_TRUE(multiend_call.valid()); 326 EXPECT_TRUE(multiend_call.valid());
336 327
337 // Test. 328 // Test.
338 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 329 EXPECT_EQ(2u, multiend_call.speaker_names().size());
339 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 330 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
340 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 331 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
341 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 332 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
342 } 333 }
343 334
344 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { 335 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
345 // Accept: 336 // Accept:
346 // A 0****.... 337 // A 0****....
347 // B ....1**** 338 // B ....1****
348 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; 339 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9;
349 const std::vector<Turn> timing = { 340 const std::vector<Turn> timing = {
350 {"A", "t500", 0}, 341 {"A", "t500", 0},
351 {"B", "t500", -100}, 342 {"B", "t500", -100},
352 }; 343 };
353 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 344 auto mock_wavreader_factory = CreateMockWavReaderFactory();
354 345
355 // There is one unique audio track to read. 346 // There is one unique audio track to read.
356 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 347 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
357 348
358 conversational_speech::MultiEndCall multiend_call( 349 conversational_speech::MultiEndCall multiend_call(
359 timing, audiotracks_path, std::move(mock_wavreader_factory)); 350 timing, audiotracks_path, std::move(mock_wavreader_factory));
360 EXPECT_TRUE(multiend_call.valid()); 351 EXPECT_TRUE(multiend_call.valid());
361 352
362 // Test. 353 // Test.
363 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 354 EXPECT_EQ(2u, multiend_call.speaker_names().size());
364 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 355 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
365 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 356 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
366 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 357 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
367 } 358 }
368 359
369 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { 360 TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
370 // Reject: 361 // Reject:
371 // A ..0**** 362 // A ..0****
372 // B .1****. The n-th turn cannot start before the (n-1)-th one. 363 // B .1****. The n-th turn cannot start before the (n-1)-th one.
373 const std::vector<Turn> timing = { 364 const std::vector<Turn> timing = {
374 {"A", "t500", 200}, 365 {"A", "t500", 200},
375 {"B", "t500", -600}, 366 {"B", "t500", -600},
376 }; 367 };
377 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 368 auto mock_wavreader_factory = CreateMockWavReaderFactory();
378 369
379 // There is one unique audio track to read. 370 // There is one unique audio track to read.
380 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 371 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
381 372
382 conversational_speech::MultiEndCall multiend_call( 373 conversational_speech::MultiEndCall multiend_call(
383 timing, audiotracks_path, std::move(mock_wavreader_factory)); 374 timing, audiotracks_path, std::move(mock_wavreader_factory));
384 EXPECT_FALSE(multiend_call.valid()); 375 EXPECT_FALSE(multiend_call.valid());
385 } 376 }
386 377
387 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { 378 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
388 // Accept: 379 // Accept:
389 // A 0****2****... 380 // A 0****2****...
390 // B ...1********* 381 // B ...1*********
391 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; 382 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3;
392 const std::vector<Turn> timing = { 383 const std::vector<Turn> timing = {
393 {"A", "t500", 0}, 384 {"A", "t500", 0},
394 {"B", "t1000", -200}, 385 {"B", "t1000", -200},
395 {"A", "t500", -800}, 386 {"A", "t500", -800},
396 }; 387 };
397 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 388 auto mock_wavreader_factory = CreateMockWavReaderFactory();
398 389
399 // There are two unique audio tracks to read. 390 // There are two unique audio tracks to read.
400 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 391 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
401 392
402 conversational_speech::MultiEndCall multiend_call( 393 conversational_speech::MultiEndCall multiend_call(
403 timing, audiotracks_path, std::move(mock_wavreader_factory)); 394 timing, audiotracks_path, std::move(mock_wavreader_factory));
404 EXPECT_TRUE(multiend_call.valid()); 395 EXPECT_TRUE(multiend_call.valid());
405 396
406 // Test. 397 // Test.
407 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 398 EXPECT_EQ(2u, multiend_call.speaker_names().size());
408 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 399 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
409 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); 400 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
410 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 401 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
411 } 402 }
412 403
413 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { 404 TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
414 // Reject: 405 // Reject:
415 // A 0****...... 406 // A 0****......
416 // A ...1****... 407 // A ...1****...
417 // B ......2**** 408 // B ......2****
418 // ^ Turn #1 overlaps with #0 which is from the same speaker. 409 // ^ Turn #1 overlaps with #0 which is from the same speaker.
419 const std::vector<Turn> timing = { 410 const std::vector<Turn> timing = {
420 {"A", "t500", 0}, 411 {"A", "t500", 0},
421 {"A", "t500", -200}, 412 {"A", "t500", -200},
422 {"B", "t500", -200}, 413 {"B", "t500", -200},
423 }; 414 };
424 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 415 auto mock_wavreader_factory = CreateMockWavReaderFactory();
425 416
426 // There is one unique audio track to read. 417 // There is one unique audio track to read.
427 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 418 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
428 419
429 conversational_speech::MultiEndCall multiend_call( 420 conversational_speech::MultiEndCall multiend_call(
430 timing, audiotracks_path, std::move(mock_wavreader_factory)); 421 timing, audiotracks_path, std::move(mock_wavreader_factory));
431 EXPECT_FALSE(multiend_call.valid()); 422 EXPECT_FALSE(multiend_call.valid());
432 } 423 }
433 424
434 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { 425 TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
435 // Reject: 426 // Reject:
436 // A 0********* 427 // A 0*********
437 // B 1**....... 428 // B 1**.......
438 // C ...2**.... 429 // C ...2**....
439 // A ......3**. 430 // A ......3**.
440 // ^ Turn #3 overlaps with #0 which is from the same speaker. 431 // ^ Turn #3 overlaps with #0 which is from the same speaker.
441 const std::vector<Turn> timing = { 432 const std::vector<Turn> timing = {
442 {"A", "t1000", 0}, 433 {"A", "t1000", 0},
443 {"B", "t300", -1000}, 434 {"B", "t300", -1000},
444 {"C", "t300", 0}, 435 {"C", "t300", 0},
445 {"A", "t300", 0}, 436 {"A", "t300", 0},
446 }; 437 };
447 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 438 auto mock_wavreader_factory = CreateMockWavReaderFactory();
448 439
449 // There are two unique audio tracks to read. 440 // There are two unique audio tracks to read.
450 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 441 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
451 442
452 conversational_speech::MultiEndCall multiend_call( 443 conversational_speech::MultiEndCall multiend_call(
453 timing, audiotracks_path, std::move(mock_wavreader_factory)); 444 timing, audiotracks_path, std::move(mock_wavreader_factory));
454 EXPECT_FALSE(multiend_call.valid()); 445 EXPECT_FALSE(multiend_call.valid());
455 } 446 }
456 447
457 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { 448 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
458 // Accept: 449 // Accept:
459 // A 0*********.. 450 // A 0*********..
460 // B ..1****..... 451 // B ..1****.....
461 // C .......2**** 452 // C .......2****
462 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; 453 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
463 const std::vector<Turn> timing = { 454 const std::vector<Turn> timing = {
464 {"A", "t1000", 0}, 455 {"A", "t1000", 0},
465 {"B", "t500", -800}, 456 {"B", "t500", -800},
466 {"C", "t500", 0}, 457 {"C", "t500", 0},
467 }; 458 };
468 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 459 auto mock_wavreader_factory = CreateMockWavReaderFactory();
469 460
470 // There are two unique audio tracks to read. 461 // There are two unique audio tracks to read.
471 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 462 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
472 463
473 conversational_speech::MultiEndCall multiend_call( 464 conversational_speech::MultiEndCall multiend_call(
474 timing, audiotracks_path, std::move(mock_wavreader_factory)); 465 timing, audiotracks_path, std::move(mock_wavreader_factory));
475 EXPECT_TRUE(multiend_call.valid()); 466 EXPECT_TRUE(multiend_call.valid());
476 467
477 // Test. 468 // Test.
478 EXPECT_EQ(3u, multiend_call.speaker_names().size()); 469 EXPECT_EQ(3u, multiend_call.speaker_names().size());
479 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 470 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
480 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); 471 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
481 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 472 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
482 } 473 }
483 474
484 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { 475 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
485 // Reject: 476 // Reject:
486 // A 0********* 477 // A 0*********
487 // B ..1****... 478 // B ..1****...
488 // C ....2****. 479 // C ....2****.
489 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers 480 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers
490 // not permitted). 481 // not permitted).
491 const std::vector<Turn> timing = { 482 const std::vector<Turn> timing = {
492 {"A", "t1000", 0}, 483 {"A", "t1000", 0},
493 {"B", "t500", -800}, 484 {"B", "t500", -800},
494 {"C", "t500", -300}, 485 {"C", "t500", -300},
495 }; 486 };
496 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 487 auto mock_wavreader_factory = CreateMockWavReaderFactory();
497 488
498 // There are two unique audio tracks to read. 489 // There are two unique audio tracks to read.
499 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 490 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
500 491
501 conversational_speech::MultiEndCall multiend_call( 492 conversational_speech::MultiEndCall multiend_call(
502 timing, audiotracks_path, std::move(mock_wavreader_factory)); 493 timing, audiotracks_path, std::move(mock_wavreader_factory));
503 EXPECT_FALSE(multiend_call.valid()); 494 EXPECT_FALSE(multiend_call.valid());
504 } 495 }
505 496
506 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { 497 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
507 // Accept: 498 // Accept:
508 // A 0*********.. 499 // A 0*********..
509 // B .2****...... 500 // B .2****......
510 // C .......3**** 501 // C .......3****
511 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; 502 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
512 const std::vector<Turn> timing = { 503 const std::vector<Turn> timing = {
513 {"A", "t1000", 0}, 504 {"A", "t1000", 0},
514 {"B", "t500", -900}, 505 {"B", "t500", -900},
515 {"C", "t500", 100}, 506 {"C", "t500", 100},
516 }; 507 };
517 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 508 auto mock_wavreader_factory = CreateMockWavReaderFactory();
518 509
519 // There are two unique audio tracks to read. 510 // There are two unique audio tracks to read.
520 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 511 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
521 512
522 conversational_speech::MultiEndCall multiend_call( 513 conversational_speech::MultiEndCall multiend_call(
523 timing, audiotracks_path, std::move(mock_wavreader_factory)); 514 timing, audiotracks_path, std::move(mock_wavreader_factory));
524 EXPECT_TRUE(multiend_call.valid()); 515 EXPECT_TRUE(multiend_call.valid());
525 516
526 // Test. 517 // Test.
527 EXPECT_EQ(3u, multiend_call.speaker_names().size()); 518 EXPECT_EQ(3u, multiend_call.speaker_names().size());
528 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 519 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
529 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); 520 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
530 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 521 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
531 } 522 }
532 523
533 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { 524 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
534 // Accept: 525 // Accept:
535 // A 0**** 526 // A 0****
536 // B 1**** 527 // B 1****
537 const std::vector<Turn> timing = { 528 const std::vector<Turn> timing = {
538 {"A", "t500", 0}, 529 {"A", "t500", 0},
539 {"B", "t500", -500}, 530 {"B", "t500", -500},
540 }; 531 };
541 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 532 auto mock_wavreader_factory = CreateMockWavReaderFactory();
542 533
543 // There is one unique audio track to read. 534 // There is one unique audio track to read.
544 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 535 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
545 536
546 conversational_speech::MultiEndCall multiend_call( 537 conversational_speech::MultiEndCall multiend_call(
547 timing, audiotracks_path, std::move(mock_wavreader_factory)); 538 timing, audiotracks_path, std::move(mock_wavreader_factory));
548 EXPECT_TRUE(multiend_call.valid()); 539 EXPECT_TRUE(multiend_call.valid());
549 540
550 // Test. 541 // Test.
551 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 542 EXPECT_EQ(2u, multiend_call.speaker_names().size());
552 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 543 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
553 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 544 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
554 } 545 }
555 546
556 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { 547 TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
557 // Accept: 548 // Accept:
558 // A 0****....3****.5**. 549 // A 0****....3****.5**.
559 // B .....1****...4**... 550 // B .....1****...4**...
560 // C ......2**.......6**.. 551 // C ......2**.......6**..
561 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; 552 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9;
562 const std::vector<Turn> timing = { 553 const std::vector<Turn> timing = {
563 {"A", "t500", 0}, 554 {"A", "t500", 0},
564 {"B", "t500", 0}, 555 {"B", "t500", 0},
565 {"C", "t300", -400}, 556 {"C", "t300", -400},
566 {"A", "t500", 0}, 557 {"A", "t500", 0},
(...skipping 12 matching lines...) Expand all
579 timing, audiotracks_path, std::move(mock_wavreader_factory)); 570 timing, audiotracks_path, std::move(mock_wavreader_factory));
580 EXPECT_TRUE(multiend_call.valid()); 571 EXPECT_TRUE(multiend_call.valid());
581 572
582 // Test. 573 // Test.
583 EXPECT_EQ(3u, multiend_call.speaker_names().size()); 574 EXPECT_EQ(3u, multiend_call.speaker_names().size());
584 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 575 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
585 EXPECT_EQ(7u, multiend_call.speaking_turns().size()); 576 EXPECT_EQ(7u, multiend_call.speaking_turns().size());
586 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 577 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
587 } 578 }
588 579
589 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { 580 TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
590 // Reject: 581 // Reject:
591 // A 0****....3****.6** 582 // A 0****....3****.6**
592 // B .....1****...4**.. 583 // B .....1****...4**..
593 // C ......2**.....5**.. 584 // C ......2**.....5**..
594 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ 585 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+
595 // speakers not permitted). 586 // speakers not permitted).
596 const std::vector<Turn> timing = { 587 const std::vector<Turn> timing = {
597 {"A", "t500", 0}, 588 {"A", "t500", 0},
598 {"B", "t500", 0}, 589 {"B", "t500", 0},
599 {"C", "t300", -400}, 590 {"C", "t300", -400},
600 {"A", "t500", 0}, 591 {"A", "t500", 0},
601 {"B", "t300", -100}, 592 {"B", "t300", -100},
602 {"A", "t300", -200}, 593 {"A", "t300", -200},
603 {"C", "t300", -200}, 594 {"C", "t300", -200},
604 }; 595 };
605 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( 596 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
606 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, 597 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
607 kDefaultMockWavReaderFactoryParamsMap)); 598 kDefaultMockWavReaderFactoryParamsMap));
608 599
609 // There are two unique audio tracks to read. 600 // There are two unique audio tracks to read.
610 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 601 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
611 602
612 conversational_speech::MultiEndCall multiend_call( 603 conversational_speech::MultiEndCall multiend_call(
613 timing, audiotracks_path, std::move(mock_wavreader_factory)); 604 timing, audiotracks_path, std::move(mock_wavreader_factory));
614 EXPECT_FALSE(multiend_call.valid()); 605 EXPECT_FALSE(multiend_call.valid());
615 } 606 }
616 607
617 TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { 608 TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
618 // Parameters with which wav files are created. 609 // Parameters with which wav files are created.
619 constexpr int duration_seconds = 5; 610 constexpr int duration_seconds = 5;
620 const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; 611 const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000};
621 612
622 for (int sample_rate : sample_rates) { 613 for (int sample_rate : sample_rates) {
623 const rtc::Pathname temp_filename( 614 const rtc::Pathname temp_filename(
624 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) 615 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)
625 + ".wav"); 616 + ".wav");
626 617
627 // Write wav file. 618 // Write wav file.
628 const std::size_t num_samples = duration_seconds * sample_rate; 619 const std::size_t num_samples = duration_seconds * sample_rate;
629 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; 620 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};
630 CreateSineWavFile(temp_filename.pathname(), params); 621 CreateSineWavFile(temp_filename.pathname(), params);
631 622
632 // Load wav file and check if params match. 623 // Load wav file and check if params match.
633 WavReaderFactory wav_reader_factory; 624 WavReaderFactory wav_reader_factory;
634 MockWavReaderFactory::Params expeted_params = { 625 MockWavReaderFactory::Params expeted_params = {
635 sample_rate, 1u, num_samples}; 626 sample_rate, 1u, num_samples};
636 CheckAudioTrackParams( 627 CheckAudioTrackParams(
637 wav_reader_factory, temp_filename.pathname(), expeted_params); 628 wav_reader_factory, temp_filename.pathname(), expeted_params);
638 629
639 // Clean up. 630 // Clean up.
640 remove(temp_filename.pathname().c_str()); 631 remove(temp_filename.pathname().c_str());
641 } 632 }
642 } 633 }
643 634
644 TEST_F(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) { 635 TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
645 // Simulated call (one character corresponding to 500 ms): 636 // Simulated call (one character corresponding to 500 ms):
646 // A 0*********...........2*********..... 637 // A 0*********...........2*********.....
647 // B ...........1*********.....3********* 638 // B ...........1*********.....3*********
648 const std::vector<Turn> expected_timing = { 639 const std::vector<Turn> expected_timing = {
649 {"A", "t5000_440.wav", 0}, 640 {"A", "t5000_440.wav", 0},
650 {"B", "t5000_880.wav", 500}, 641 {"B", "t5000_880.wav", 500},
651 {"A", "t5000_440.wav", 0}, 642 {"A", "t5000_440.wav", 0},
652 {"B", "t5000_880.wav", -2500}, 643 {"B", "t5000_880.wav", -2500},
653 }; 644 };
654 const std::size_t expected_duration_seconds = 18; 645 const std::size_t expected_duration_seconds = 18;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
688 CheckAudioTrackParams( 679 CheckAudioTrackParams(
689 wav_reader_factory, it.second.far_end, expeted_params); 680 wav_reader_factory, it.second.far_end, expeted_params);
690 } 681 }
691 682
692 // Clean. 683 // Clean.
693 EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path)); 684 EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path));
694 } 685 }
695 686
696 } // namespace test 687 } // namespace test
697 } // namespace webrtc 688 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698