Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2808053002: Conversational Speech tool completed (Closed)
Patch Set: output for the user added Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 // This file consists of unit tests for webrtc::test::conversational_speech 11 // This file consists of unit tests for webrtc::test::conversational_speech
12 // members. Part of them focus on accepting or rejecting different 12 // members. Part of them focus on accepting or rejecting different
13 // conversational speech setups. A setup is defined by a set of audio tracks and 13 // conversational speech setups. A setup is defined by a set of audio tracks and
14 // timing information). 14 // timing information).
15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest, 15 // The docstring at the beginning of each TEST(ConversationalSpeechTest,
16 // MultiEndCallSetup*) function looks like the drawing below and indicates which 16 // MultiEndCallSetup*) function looks like the drawing below and indicates which
17 // setup is tested. 17 // setup is tested.
18 // 18 //
19 // Accept: 19 // Accept:
20 // A 0****..... 20 // A 0****.....
21 // B .....1**** 21 // B .....1****
22 // 22 //
23 // The drawing indicates the following: 23 // The drawing indicates the following:
24 // - the illustrated setup should be accepted, 24 // - the illustrated setup should be accepted,
25 // - there are two speakers (namely, A and B), 25 // - there are two speakers (namely, A and B),
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 auto wav_reader = wav_reader_factory.Create(filepath); 149 auto wav_reader = wav_reader_factory.Create(filepath);
150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); 150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());
151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); 151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());
152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); 152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());
153 } 153 }
154 154
155 } // namespace 155 } // namespace
156 156
157 using testing::_; 157 using testing::_;
158 158
159 // TODO(alessiob): Remove fixture once conversational_speech fully implemented 159 TEST(ConversationalSpeechTest, Settings) {
160 // and replace TEST_F with TEST.
161 class ConversationalSpeechTest : public testing::Test {
162 public:
163 ConversationalSpeechTest() {
164 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
165 }
166 };
167
168 TEST_F(ConversationalSpeechTest, Settings) {
169 const conversational_speech::Config config( 160 const conversational_speech::Config config(
170 audiotracks_path, timing_filepath, output_path); 161 audiotracks_path, timing_filepath, output_path);
171 162
172 // Test getters. 163 // Test getters.
173 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); 164 EXPECT_EQ(audiotracks_path, config.audiotracks_path());
174 EXPECT_EQ(timing_filepath, config.timing_filepath()); 165 EXPECT_EQ(timing_filepath, config.timing_filepath());
175 EXPECT_EQ(output_path, config.output_path()); 166 EXPECT_EQ(output_path, config.output_path());
176 } 167 }
177 168
178 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { 169 TEST(ConversationalSpeechTest, TimingSaveLoad) {
179 // Save test timing. 170 // Save test timing.
180 const std::string temporary_filepath = webrtc::test::TempFilename( 171 const std::string temporary_filepath = webrtc::test::TempFilename(
181 webrtc::test::OutputPath(), "TempTimingTestFile"); 172 webrtc::test::OutputPath(), "TempTimingTestFile");
182 SaveTiming(temporary_filepath, expected_timing); 173 SaveTiming(temporary_filepath, expected_timing);
183 174
184 // Create a std::vector<Turn> instance by loading from file. 175 // Create a std::vector<Turn> instance by loading from file.
185 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); 176 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);
186 std::remove(temporary_filepath.c_str()); 177 std::remove(temporary_filepath.c_str());
187 178
188 // Check size. 179 // Check size.
189 EXPECT_EQ(expected_timing.size(), actual_timing.size()); 180 EXPECT_EQ(expected_timing.size(), actual_timing.size());
190 181
191 // Check Turn instances. 182 // Check Turn instances.
192 for (size_t index = 0; index < expected_timing.size(); ++index) { 183 for (size_t index = 0; index < expected_timing.size(); ++index) {
193 EXPECT_EQ(expected_timing[index], actual_timing[index]) 184 EXPECT_EQ(expected_timing[index], actual_timing[index])
194 << "turn #" << index << " not matching"; 185 << "turn #" << index << " not matching";
195 } 186 }
196 } 187 }
197 188
198 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { 189 TEST(ConversationalSpeechTest, MultiEndCallCreate) {
199 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 190 auto mock_wavreader_factory = CreateMockWavReaderFactory();
200 191
201 // There are 5 unique audio tracks to read. 192 // There are 5 unique audio tracks to read.
202 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); 193 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5);
203 194
204 // Inject the mock wav reader factory. 195 // Inject the mock wav reader factory.
205 conversational_speech::MultiEndCall multiend_call( 196 conversational_speech::MultiEndCall multiend_call(
206 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); 197 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
207 EXPECT_TRUE(multiend_call.valid()); 198 EXPECT_TRUE(multiend_call.valid());
208 199
209 // Test. 200 // Test.
210 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 201 EXPECT_EQ(2u, multiend_call.speaker_names().size());
211 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); 202 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
212 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); 203 EXPECT_EQ(6u, multiend_call.speaking_turns().size());
213 } 204 }
214 205
215 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { 206 TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
216 const std::vector<Turn> timing = { 207 const std::vector<Turn> timing = {
217 {"A", "sr8000", 0}, 208 {"A", "sr8000", 0},
218 {"B", "sr16000", 0}, 209 {"B", "sr16000", 0},
219 }; 210 };
220 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 211 auto mock_wavreader_factory = CreateMockWavReaderFactory();
221 212
222 // There are two unique audio tracks to read. 213 // There are two unique audio tracks to read.
223 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); 214 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
224 215
225 MultiEndCall multiend_call( 216 MultiEndCall multiend_call(
226 timing, audiotracks_path, std::move(mock_wavreader_factory)); 217 timing, audiotracks_path, std::move(mock_wavreader_factory));
227 EXPECT_FALSE(multiend_call.valid()); 218 EXPECT_FALSE(multiend_call.valid());
228 } 219 }
229 220
230 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { 221 TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
231 const std::vector<Turn> timing = { 222 const std::vector<Turn> timing = {
232 {"A", "sr16000_stereo", 0}, 223 {"A", "sr16000_stereo", 0},
233 {"B", "sr16000_stereo", 0}, 224 {"B", "sr16000_stereo", 0},
234 }; 225 };
235 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 226 auto mock_wavreader_factory = CreateMockWavReaderFactory();
236 227
237 // There is one unique audio track to read. 228 // There is one unique audio track to read.
238 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); 229 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
239 230
240 MultiEndCall multiend_call( 231 MultiEndCall multiend_call(
241 timing, audiotracks_path, std::move(mock_wavreader_factory)); 232 timing, audiotracks_path, std::move(mock_wavreader_factory));
242 EXPECT_FALSE(multiend_call.valid()); 233 EXPECT_FALSE(multiend_call.valid());
243 } 234 }
244 235
245 TEST_F(ConversationalSpeechTest, 236 TEST(ConversationalSpeechTest,
246 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { 237 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
247 const std::vector<Turn> timing = { 238 const std::vector<Turn> timing = {
248 {"A", "sr8000", 0}, 239 {"A", "sr8000", 0},
249 {"B", "sr16000_stereo", 0}, 240 {"B", "sr16000_stereo", 0},
250 }; 241 };
251 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 242 auto mock_wavreader_factory = CreateMockWavReaderFactory();
252 243
253 // There are two unique audio tracks to read. 244 // There are two unique audio tracks to read.
254 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); 245 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
255 246
256 MultiEndCall multiend_call( 247 MultiEndCall multiend_call(
257 timing, audiotracks_path, std::move(mock_wavreader_factory)); 248 timing, audiotracks_path, std::move(mock_wavreader_factory));
258 EXPECT_FALSE(multiend_call.valid()); 249 EXPECT_FALSE(multiend_call.valid());
259 } 250 }
260 251
261 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { 252 TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
262 const std::vector<Turn> timing = { 253 const std::vector<Turn> timing = {
263 {"A", "t500", -100}, 254 {"A", "t500", -100},
264 {"B", "t500", 0}, 255 {"B", "t500", 0},
265 }; 256 };
266 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 257 auto mock_wavreader_factory = CreateMockWavReaderFactory();
267 258
268 // There is one unique audio track to read. 259 // There is one unique audio track to read.
269 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 260 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
270 261
271 conversational_speech::MultiEndCall multiend_call( 262 conversational_speech::MultiEndCall multiend_call(
272 timing, audiotracks_path, std::move(mock_wavreader_factory)); 263 timing, audiotracks_path, std::move(mock_wavreader_factory));
273 EXPECT_FALSE(multiend_call.valid()); 264 EXPECT_FALSE(multiend_call.valid());
274 } 265 }
275 266
276 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) { 267 TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) {
277 // Accept: 268 // Accept:
278 // A 0****..... 269 // A 0****.....
279 // B .....1**** 270 // B .....1****
280 constexpr std::size_t expected_duration = kDefaultSampleRate; 271 constexpr std::size_t expected_duration = kDefaultSampleRate;
281 const std::vector<Turn> timing = { 272 const std::vector<Turn> timing = {
282 {"A", "t500", 0}, 273 {"A", "t500", 0},
283 {"B", "t500", 0}, 274 {"B", "t500", 0},
284 }; 275 };
285 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 276 auto mock_wavreader_factory = CreateMockWavReaderFactory();
286 277
287 // There is one unique audio track to read. 278 // There is one unique audio track to read.
288 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 279 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
289 280
290 conversational_speech::MultiEndCall multiend_call( 281 conversational_speech::MultiEndCall multiend_call(
291 timing, audiotracks_path, std::move(mock_wavreader_factory)); 282 timing, audiotracks_path, std::move(mock_wavreader_factory));
292 EXPECT_TRUE(multiend_call.valid()); 283 EXPECT_TRUE(multiend_call.valid());
293 284
294 // Test. 285 // Test.
295 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 286 EXPECT_EQ(2u, multiend_call.speaker_names().size());
296 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 287 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
297 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 288 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
298 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 289 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
299 } 290 }
300 291
301 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) { 292 TEST(ConversationalSpeechTest, MultiEndCallSetupPause) {
302 // Accept: 293 // Accept:
303 // A 0****....... 294 // A 0****.......
304 // B .......1**** 295 // B .......1****
305 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; 296 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
306 const std::vector<Turn> timing = { 297 const std::vector<Turn> timing = {
307 {"A", "t500", 0}, 298 {"A", "t500", 0},
308 {"B", "t500", 200}, 299 {"B", "t500", 200},
309 }; 300 };
310 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 301 auto mock_wavreader_factory = CreateMockWavReaderFactory();
311 302
312 // There is one unique audio track to read. 303 // There is one unique audio track to read.
313 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 304 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
314 305
315 conversational_speech::MultiEndCall multiend_call( 306 conversational_speech::MultiEndCall multiend_call(
316 timing, audiotracks_path, std::move(mock_wavreader_factory)); 307 timing, audiotracks_path, std::move(mock_wavreader_factory));
317 EXPECT_TRUE(multiend_call.valid()); 308 EXPECT_TRUE(multiend_call.valid());
318 309
319 // Test. 310 // Test.
320 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 311 EXPECT_EQ(2u, multiend_call.speaker_names().size());
321 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 312 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
322 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 313 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
323 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 314 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
324 } 315 }
325 316
326 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { 317 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) {
327 // Accept: 318 // Accept:
328 // A 0****.... 319 // A 0****....
329 // B ....1**** 320 // B ....1****
330 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; 321 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9;
331 const std::vector<Turn> timing = { 322 const std::vector<Turn> timing = {
332 {"A", "t500", 0}, 323 {"A", "t500", 0},
333 {"B", "t500", -100}, 324 {"B", "t500", -100},
334 }; 325 };
335 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 326 auto mock_wavreader_factory = CreateMockWavReaderFactory();
336 327
337 // There is one unique audio track to read. 328 // There is one unique audio track to read.
338 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 329 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
339 330
340 conversational_speech::MultiEndCall multiend_call( 331 conversational_speech::MultiEndCall multiend_call(
341 timing, audiotracks_path, std::move(mock_wavreader_factory)); 332 timing, audiotracks_path, std::move(mock_wavreader_factory));
342 EXPECT_TRUE(multiend_call.valid()); 333 EXPECT_TRUE(multiend_call.valid());
343 334
344 // Test. 335 // Test.
345 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 336 EXPECT_EQ(2u, multiend_call.speaker_names().size());
346 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 337 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
347 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 338 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
348 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 339 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
349 } 340 }
350 341
351 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { 342 TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) {
352 // Reject: 343 // Reject:
353 // A ..0**** 344 // A ..0****
354 // B .1****. The n-th turn cannot start before the (n-1)-th one. 345 // B .1****. The n-th turn cannot start before the (n-1)-th one.
355 const std::vector<Turn> timing = { 346 const std::vector<Turn> timing = {
356 {"A", "t500", 200}, 347 {"A", "t500", 200},
357 {"B", "t500", -600}, 348 {"B", "t500", -600},
358 }; 349 };
359 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 350 auto mock_wavreader_factory = CreateMockWavReaderFactory();
360 351
361 // There is one unique audio track to read. 352 // There is one unique audio track to read.
362 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 353 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
363 354
364 conversational_speech::MultiEndCall multiend_call( 355 conversational_speech::MultiEndCall multiend_call(
365 timing, audiotracks_path, std::move(mock_wavreader_factory)); 356 timing, audiotracks_path, std::move(mock_wavreader_factory));
366 EXPECT_FALSE(multiend_call.valid()); 357 EXPECT_FALSE(multiend_call.valid());
367 } 358 }
368 359
369 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { 360 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) {
370 // Accept: 361 // Accept:
371 // A 0****2****... 362 // A 0****2****...
372 // B ...1********* 363 // B ...1*********
373 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; 364 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3;
374 const std::vector<Turn> timing = { 365 const std::vector<Turn> timing = {
375 {"A", "t500", 0}, 366 {"A", "t500", 0},
376 {"B", "t1000", -200}, 367 {"B", "t1000", -200},
377 {"A", "t500", -800}, 368 {"A", "t500", -800},
378 }; 369 };
379 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 370 auto mock_wavreader_factory = CreateMockWavReaderFactory();
380 371
381 // There are two unique audio tracks to read. 372 // There are two unique audio tracks to read.
382 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 373 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
383 374
384 conversational_speech::MultiEndCall multiend_call( 375 conversational_speech::MultiEndCall multiend_call(
385 timing, audiotracks_path, std::move(mock_wavreader_factory)); 376 timing, audiotracks_path, std::move(mock_wavreader_factory));
386 EXPECT_TRUE(multiend_call.valid()); 377 EXPECT_TRUE(multiend_call.valid());
387 378
388 // Test. 379 // Test.
389 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 380 EXPECT_EQ(2u, multiend_call.speaker_names().size());
390 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 381 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
391 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); 382 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
392 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 383 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
393 } 384 }
394 385
395 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { 386 TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) {
396 // Reject: 387 // Reject:
397 // A 0****...... 388 // A 0****......
398 // A ...1****... 389 // A ...1****...
399 // B ......2**** 390 // B ......2****
400 // ^ Turn #1 overlaps with #0 which is from the same speaker. 391 // ^ Turn #1 overlaps with #0 which is from the same speaker.
401 const std::vector<Turn> timing = { 392 const std::vector<Turn> timing = {
402 {"A", "t500", 0}, 393 {"A", "t500", 0},
403 {"A", "t500", -200}, 394 {"A", "t500", -200},
404 {"B", "t500", -200}, 395 {"B", "t500", -200},
405 }; 396 };
406 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 397 auto mock_wavreader_factory = CreateMockWavReaderFactory();
407 398
408 // There is one unique audio track to read. 399 // There is one unique audio track to read.
409 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 400 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
410 401
411 conversational_speech::MultiEndCall multiend_call( 402 conversational_speech::MultiEndCall multiend_call(
412 timing, audiotracks_path, std::move(mock_wavreader_factory)); 403 timing, audiotracks_path, std::move(mock_wavreader_factory));
413 EXPECT_FALSE(multiend_call.valid()); 404 EXPECT_FALSE(multiend_call.valid());
414 } 405 }
415 406
416 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { 407 TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) {
417 // Reject: 408 // Reject:
418 // A 0********* 409 // A 0*********
419 // B 1**....... 410 // B 1**.......
420 // C ...2**.... 411 // C ...2**....
421 // A ......3**. 412 // A ......3**.
422 // ^ Turn #3 overlaps with #0 which is from the same speaker. 413 // ^ Turn #3 overlaps with #0 which is from the same speaker.
423 const std::vector<Turn> timing = { 414 const std::vector<Turn> timing = {
424 {"A", "t1000", 0}, 415 {"A", "t1000", 0},
425 {"B", "t300", -1000}, 416 {"B", "t300", -1000},
426 {"C", "t300", 0}, 417 {"C", "t300", 0},
427 {"A", "t300", 0}, 418 {"A", "t300", 0},
428 }; 419 };
429 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 420 auto mock_wavreader_factory = CreateMockWavReaderFactory();
430 421
431 // There are two unique audio tracks to read. 422 // There are two unique audio tracks to read.
432 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 423 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
433 424
434 conversational_speech::MultiEndCall multiend_call( 425 conversational_speech::MultiEndCall multiend_call(
435 timing, audiotracks_path, std::move(mock_wavreader_factory)); 426 timing, audiotracks_path, std::move(mock_wavreader_factory));
436 EXPECT_FALSE(multiend_call.valid()); 427 EXPECT_FALSE(multiend_call.valid());
437 } 428 }
438 429
439 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { 430 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) {
440 // Accept: 431 // Accept:
441 // A 0*********.. 432 // A 0*********..
442 // B ..1****..... 433 // B ..1****.....
443 // C .......2**** 434 // C .......2****
444 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; 435 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
445 const std::vector<Turn> timing = { 436 const std::vector<Turn> timing = {
446 {"A", "t1000", 0}, 437 {"A", "t1000", 0},
447 {"B", "t500", -800}, 438 {"B", "t500", -800},
448 {"C", "t500", 0}, 439 {"C", "t500", 0},
449 }; 440 };
450 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 441 auto mock_wavreader_factory = CreateMockWavReaderFactory();
451 442
452 // There are two unique audio tracks to read. 443 // There are two unique audio tracks to read.
453 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 444 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
454 445
455 conversational_speech::MultiEndCall multiend_call( 446 conversational_speech::MultiEndCall multiend_call(
456 timing, audiotracks_path, std::move(mock_wavreader_factory)); 447 timing, audiotracks_path, std::move(mock_wavreader_factory));
457 EXPECT_TRUE(multiend_call.valid()); 448 EXPECT_TRUE(multiend_call.valid());
458 449
459 // Test. 450 // Test.
460 EXPECT_EQ(3u, multiend_call.speaker_names().size()); 451 EXPECT_EQ(3u, multiend_call.speaker_names().size());
461 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 452 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
462 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); 453 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
463 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 454 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
464 } 455 }
465 456
466 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { 457 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) {
467 // Reject: 458 // Reject:
468 // A 0********* 459 // A 0*********
469 // B ..1****... 460 // B ..1****...
470 // C ....2****. 461 // C ....2****.
471 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers 462 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers
472 // not permitted). 463 // not permitted).
473 const std::vector<Turn> timing = { 464 const std::vector<Turn> timing = {
474 {"A", "t1000", 0}, 465 {"A", "t1000", 0},
475 {"B", "t500", -800}, 466 {"B", "t500", -800},
476 {"C", "t500", -300}, 467 {"C", "t500", -300},
477 }; 468 };
478 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 469 auto mock_wavreader_factory = CreateMockWavReaderFactory();
479 470
480 // There are two unique audio tracks to read. 471 // There are two unique audio tracks to read.
481 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 472 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
482 473
483 conversational_speech::MultiEndCall multiend_call( 474 conversational_speech::MultiEndCall multiend_call(
484 timing, audiotracks_path, std::move(mock_wavreader_factory)); 475 timing, audiotracks_path, std::move(mock_wavreader_factory));
485 EXPECT_FALSE(multiend_call.valid()); 476 EXPECT_FALSE(multiend_call.valid());
486 } 477 }
487 478
488 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { 479 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) {
489 // Accept: 480 // Accept:
490 // A 0*********.. 481 // A 0*********..
491 // B .2****...... 482 // B .2****......
492 // C .......3**** 483 // C .......3****
493 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; 484 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2;
494 const std::vector<Turn> timing = { 485 const std::vector<Turn> timing = {
495 {"A", "t1000", 0}, 486 {"A", "t1000", 0},
496 {"B", "t500", -900}, 487 {"B", "t500", -900},
497 {"C", "t500", 100}, 488 {"C", "t500", 100},
498 }; 489 };
499 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 490 auto mock_wavreader_factory = CreateMockWavReaderFactory();
500 491
501 // There are two unique audio tracks to read. 492 // There are two unique audio tracks to read.
502 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 493 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
503 494
504 conversational_speech::MultiEndCall multiend_call( 495 conversational_speech::MultiEndCall multiend_call(
505 timing, audiotracks_path, std::move(mock_wavreader_factory)); 496 timing, audiotracks_path, std::move(mock_wavreader_factory));
506 EXPECT_TRUE(multiend_call.valid()); 497 EXPECT_TRUE(multiend_call.valid());
507 498
508 // Test. 499 // Test.
509 EXPECT_EQ(3u, multiend_call.speaker_names().size()); 500 EXPECT_EQ(3u, multiend_call.speaker_names().size());
510 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 501 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
511 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); 502 EXPECT_EQ(3u, multiend_call.speaking_turns().size());
512 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 503 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
513 } 504 }
514 505
515 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { 506 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) {
516 // Accept: 507 // Accept:
517 // A 0**** 508 // A 0****
518 // B 1**** 509 // B 1****
519 const std::vector<Turn> timing = { 510 const std::vector<Turn> timing = {
520 {"A", "t500", 0}, 511 {"A", "t500", 0},
521 {"B", "t500", -500}, 512 {"B", "t500", -500},
522 }; 513 };
523 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 514 auto mock_wavreader_factory = CreateMockWavReaderFactory();
524 515
525 // There is one unique audio track to read. 516 // There is one unique audio track to read.
526 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 517 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
527 518
528 conversational_speech::MultiEndCall multiend_call( 519 conversational_speech::MultiEndCall multiend_call(
529 timing, audiotracks_path, std::move(mock_wavreader_factory)); 520 timing, audiotracks_path, std::move(mock_wavreader_factory));
530 EXPECT_TRUE(multiend_call.valid()); 521 EXPECT_TRUE(multiend_call.valid());
531 522
532 // Test. 523 // Test.
533 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 524 EXPECT_EQ(2u, multiend_call.speaker_names().size());
534 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); 525 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size());
535 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); 526 EXPECT_EQ(2u, multiend_call.speaking_turns().size());
536 } 527 }
537 528
538 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { 529 TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) {
539 // Accept: 530 // Accept:
540 // A 0****....3****.5**. 531 // A 0****....3****.5**.
541 // B .....1****...4**... 532 // B .....1****...4**...
542 // C ......2**.......6**.. 533 // C ......2**.......6**..
543 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; 534 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9;
544 const std::vector<Turn> timing = { 535 const std::vector<Turn> timing = {
545 {"A", "t500", 0}, 536 {"A", "t500", 0},
546 {"B", "t500", 0}, 537 {"B", "t500", 0},
547 {"C", "t300", -400}, 538 {"C", "t300", -400},
548 {"A", "t500", 0}, 539 {"A", "t500", 0},
(...skipping 12 matching lines...) Expand all
561 timing, audiotracks_path, std::move(mock_wavreader_factory)); 552 timing, audiotracks_path, std::move(mock_wavreader_factory));
562 EXPECT_TRUE(multiend_call.valid()); 553 EXPECT_TRUE(multiend_call.valid());
563 554
564 // Test. 555 // Test.
565 EXPECT_EQ(3u, multiend_call.speaker_names().size()); 556 EXPECT_EQ(3u, multiend_call.speaker_names().size());
566 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); 557 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size());
567 EXPECT_EQ(7u, multiend_call.speaking_turns().size()); 558 EXPECT_EQ(7u, multiend_call.speaking_turns().size());
568 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); 559 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples());
569 } 560 }
570 561
571 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { 562 TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) {
572 // Reject: 563 // Reject:
573 // A 0****....3****.6** 564 // A 0****....3****.6**
574 // B .....1****...4**.. 565 // B .....1****...4**..
575 // C ......2**.....5**.. 566 // C ......2**.....5**..
576 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ 567 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+
577 // speakers not permitted). 568 // speakers not permitted).
578 const std::vector<Turn> timing = { 569 const std::vector<Turn> timing = {
579 {"A", "t500", 0}, 570 {"A", "t500", 0},
580 {"B", "t500", 0}, 571 {"B", "t500", 0},
581 {"C", "t300", -400}, 572 {"C", "t300", -400},
582 {"A", "t500", 0}, 573 {"A", "t500", 0},
583 {"B", "t300", -100}, 574 {"B", "t300", -100},
584 {"A", "t300", -200}, 575 {"A", "t300", -200},
585 {"C", "t300", -200}, 576 {"C", "t300", -200},
586 }; 577 };
587 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( 578 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>(
588 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, 579 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
589 kDefaultMockWavReaderFactoryParamsMap)); 580 kDefaultMockWavReaderFactoryParamsMap));
590 581
591 // There are two unique audio tracks to read. 582 // There are two unique audio tracks to read.
592 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); 583 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2);
593 584
594 conversational_speech::MultiEndCall multiend_call( 585 conversational_speech::MultiEndCall multiend_call(
595 timing, audiotracks_path, std::move(mock_wavreader_factory)); 586 timing, audiotracks_path, std::move(mock_wavreader_factory));
596 EXPECT_FALSE(multiend_call.valid()); 587 EXPECT_FALSE(multiend_call.valid());
597 } 588 }
598 589
599 TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { 590 TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) {
600 // Parameters with which wav files are created. 591 // Parameters with which wav files are created.
601 constexpr int duration_seconds = 5; 592 constexpr int duration_seconds = 5;
602 const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; 593 const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000};
603 594
604 for (int sample_rate : sample_rates) { 595 for (int sample_rate : sample_rates) {
605 const rtc::Pathname temp_filename( 596 const rtc::Pathname temp_filename(
606 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) 597 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)
607 + ".wav"); 598 + ".wav");
608 599
609 // Write wav file. 600 // Write wav file.
610 const std::size_t num_samples = duration_seconds * sample_rate; 601 const std::size_t num_samples = duration_seconds * sample_rate;
611 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; 602 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};
612 CreateSineWavFile(temp_filename.pathname(), params); 603 CreateSineWavFile(temp_filename.pathname(), params);
613 604
614 // Load wav file and check if params match. 605 // Load wav file and check if params match.
615 WavReaderFactory wav_reader_factory; 606 WavReaderFactory wav_reader_factory;
616 MockWavReaderFactory::Params expeted_params = { 607 MockWavReaderFactory::Params expeted_params = {
617 sample_rate, 1u, num_samples}; 608 sample_rate, 1u, num_samples};
618 CheckAudioTrackParams( 609 CheckAudioTrackParams(
619 wav_reader_factory, temp_filename.pathname(), expeted_params); 610 wav_reader_factory, temp_filename.pathname(), expeted_params);
620 611
621 // Clean up. 612 // Clean up.
622 remove(temp_filename.pathname().c_str()); 613 remove(temp_filename.pathname().c_str());
623 } 614 }
624 } 615 }
625 616
626 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) { 617 TEST(ConversationalSpeechTest, MultiEndCallSimulator) {
627 // Simulated call (one character corresponding to 500 ms): 618 // Simulated call (one character corresponding to 500 ms):
628 // A 0*********...........2*********..... 619 // A 0*********...........2*********.....
629 // B ...........1*********.....3********* 620 // B ...........1*********.....3*********
630 const std::vector<Turn> expected_timing = { 621 const std::vector<Turn> expected_timing = {
631 {"A", "t5000_440.wav", 0}, 622 {"A", "t5000_440.wav", 0},
632 {"B", "t5000_880.wav", 500}, 623 {"B", "t5000_880.wav", 500},
633 {"A", "t5000_440.wav", 0}, 624 {"A", "t5000_440.wav", 0},
634 {"B", "t5000_880.wav", -2500}, 625 {"B", "t5000_880.wav", -2500},
635 }; 626 };
636 const std::size_t expected_duration_seconds = 18; 627 const std::size_t expected_duration_seconds = 18;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
673 } 664 }
674 665
675 // Clean. 666 // Clean.
676 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents( 667 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents(
677 rtc::Pathname(audiotracks_path))) 668 rtc::Pathname(audiotracks_path)))
678 << "Cannot delete temporary data directory " << audiotracks_path; 669 << "Cannot delete temporary data directory " << audiotracks_path;
679 } 670 }
680 671
681 } // namespace test 672 } // namespace test
682 } // namespace webrtc 673 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698