OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 // This file consists of unit tests for webrtc::test::conversational_speech | 11 // This file consists of unit tests for webrtc::test::conversational_speech |
12 // members. Part of them focus on accepting or rejecting different | 12 // members. Part of them focus on accepting or rejecting different |
13 // conversational speech setups. A setup is defined by a set of audio tracks and | 13 // conversational speech setups. A setup is defined by a set of audio tracks and |
14 // timing information). | 14 // timing information). |
15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest, | 15 // The docstring at the beginning of each TEST(ConversationalSpeechTest, |
16 // MultiEndCallSetup*) function looks like the drawing below and indicates which | 16 // MultiEndCallSetup*) function looks like the drawing below and indicates which |
17 // setup is tested. | 17 // setup is tested. |
18 // | 18 // |
19 // Accept: | 19 // Accept: |
20 // A 0****..... | 20 // A 0****..... |
21 // B .....1**** | 21 // B .....1**** |
22 // | 22 // |
23 // The drawing indicates the following: | 23 // The drawing indicates the following: |
24 // - the illustrated setup should be accepted, | 24 // - the illustrated setup should be accepted, |
25 // - there are two speakers (namely, A and B), | 25 // - there are two speakers (namely, A and B), |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 auto wav_reader = wav_reader_factory.Create(filepath); | 149 auto wav_reader = wav_reader_factory.Create(filepath); |
150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); | 150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); |
151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); | 151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); |
152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); | 152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); |
153 } | 153 } |
154 | 154 |
155 } // namespace | 155 } // namespace |
156 | 156 |
157 using testing::_; | 157 using testing::_; |
158 | 158 |
159 // TODO(alessiob): Remove fixture once conversational_speech fully implemented | 159 TEST(ConversationalSpeechTest, Settings) { |
160 // and replace TEST_F with TEST. | |
161 class ConversationalSpeechTest : public testing::Test { | |
162 public: | |
163 ConversationalSpeechTest() { | |
164 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); | |
165 } | |
166 }; | |
167 | |
168 TEST_F(ConversationalSpeechTest, Settings) { | |
169 const conversational_speech::Config config( | 160 const conversational_speech::Config config( |
170 audiotracks_path, timing_filepath, output_path); | 161 audiotracks_path, timing_filepath, output_path); |
171 | 162 |
172 // Test getters. | 163 // Test getters. |
173 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); | 164 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); |
174 EXPECT_EQ(timing_filepath, config.timing_filepath()); | 165 EXPECT_EQ(timing_filepath, config.timing_filepath()); |
175 EXPECT_EQ(output_path, config.output_path()); | 166 EXPECT_EQ(output_path, config.output_path()); |
176 } | 167 } |
177 | 168 |
178 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { | 169 TEST(ConversationalSpeechTest, TimingSaveLoad) { |
179 // Save test timing. | 170 // Save test timing. |
180 const std::string temporary_filepath = webrtc::test::TempFilename( | 171 const std::string temporary_filepath = webrtc::test::TempFilename( |
181 webrtc::test::OutputPath(), "TempTimingTestFile"); | 172 webrtc::test::OutputPath(), "TempTimingTestFile"); |
182 SaveTiming(temporary_filepath, expected_timing); | 173 SaveTiming(temporary_filepath, expected_timing); |
183 | 174 |
184 // Create a std::vector<Turn> instance by loading from file. | 175 // Create a std::vector<Turn> instance by loading from file. |
185 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); | 176 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); |
186 std::remove(temporary_filepath.c_str()); | 177 std::remove(temporary_filepath.c_str()); |
187 | 178 |
188 // Check size. | 179 // Check size. |
189 EXPECT_EQ(expected_timing.size(), actual_timing.size()); | 180 EXPECT_EQ(expected_timing.size(), actual_timing.size()); |
190 | 181 |
191 // Check Turn instances. | 182 // Check Turn instances. |
192 for (size_t index = 0; index < expected_timing.size(); ++index) { | 183 for (size_t index = 0; index < expected_timing.size(); ++index) { |
193 EXPECT_EQ(expected_timing[index], actual_timing[index]) | 184 EXPECT_EQ(expected_timing[index], actual_timing[index]) |
194 << "turn #" << index << " not matching"; | 185 << "turn #" << index << " not matching"; |
195 } | 186 } |
196 } | 187 } |
197 | 188 |
198 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { | 189 TEST(ConversationalSpeechTest, MultiEndCallCreate) { |
199 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 190 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
200 | 191 |
201 // There are 5 unique audio tracks to read. | 192 // There are 5 unique audio tracks to read. |
202 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); | 193 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); |
203 | 194 |
204 // Inject the mock wav reader factory. | 195 // Inject the mock wav reader factory. |
205 conversational_speech::MultiEndCall multiend_call( | 196 conversational_speech::MultiEndCall multiend_call( |
206 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); | 197 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); |
207 EXPECT_TRUE(multiend_call.valid()); | 198 EXPECT_TRUE(multiend_call.valid()); |
208 | 199 |
209 // Test. | 200 // Test. |
210 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 201 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
211 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); | 202 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); |
212 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); | 203 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); |
213 } | 204 } |
214 | 205 |
215 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { | 206 TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { |
216 const std::vector<Turn> timing = { | 207 const std::vector<Turn> timing = { |
217 {"A", "sr8000", 0}, | 208 {"A", "sr8000", 0}, |
218 {"B", "sr16000", 0}, | 209 {"B", "sr16000", 0}, |
219 }; | 210 }; |
220 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 211 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
221 | 212 |
222 // There are two unique audio tracks to read. | 213 // There are two unique audio tracks to read. |
223 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | 214 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
224 | 215 |
225 MultiEndCall multiend_call( | 216 MultiEndCall multiend_call( |
226 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 217 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
227 EXPECT_FALSE(multiend_call.valid()); | 218 EXPECT_FALSE(multiend_call.valid()); |
228 } | 219 } |
229 | 220 |
230 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { | 221 TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { |
231 const std::vector<Turn> timing = { | 222 const std::vector<Turn> timing = { |
232 {"A", "sr16000_stereo", 0}, | 223 {"A", "sr16000_stereo", 0}, |
233 {"B", "sr16000_stereo", 0}, | 224 {"B", "sr16000_stereo", 0}, |
234 }; | 225 }; |
235 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 226 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
236 | 227 |
237 // There is one unique audio track to read. | 228 // There is one unique audio track to read. |
238 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | 229 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
239 | 230 |
240 MultiEndCall multiend_call( | 231 MultiEndCall multiend_call( |
241 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 232 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
242 EXPECT_FALSE(multiend_call.valid()); | 233 EXPECT_FALSE(multiend_call.valid()); |
243 } | 234 } |
244 | 235 |
245 TEST_F(ConversationalSpeechTest, | 236 TEST(ConversationalSpeechTest, |
246 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { | 237 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { |
247 const std::vector<Turn> timing = { | 238 const std::vector<Turn> timing = { |
248 {"A", "sr8000", 0}, | 239 {"A", "sr8000", 0}, |
249 {"B", "sr16000_stereo", 0}, | 240 {"B", "sr16000_stereo", 0}, |
250 }; | 241 }; |
251 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 242 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
252 | 243 |
253 // There are two unique audio tracks to read. | 244 // There are two unique audio tracks to read. |
254 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | 245 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
255 | 246 |
256 MultiEndCall multiend_call( | 247 MultiEndCall multiend_call( |
257 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 248 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
258 EXPECT_FALSE(multiend_call.valid()); | 249 EXPECT_FALSE(multiend_call.valid()); |
259 } | 250 } |
260 | 251 |
261 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { | 252 TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { |
262 const std::vector<Turn> timing = { | 253 const std::vector<Turn> timing = { |
263 {"A", "t500", -100}, | 254 {"A", "t500", -100}, |
264 {"B", "t500", 0}, | 255 {"B", "t500", 0}, |
265 }; | 256 }; |
266 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 257 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
267 | 258 |
268 // There is one unique audio track to read. | 259 // There is one unique audio track to read. |
269 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 260 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
270 | 261 |
271 conversational_speech::MultiEndCall multiend_call( | 262 conversational_speech::MultiEndCall multiend_call( |
272 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 263 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
273 EXPECT_FALSE(multiend_call.valid()); | 264 EXPECT_FALSE(multiend_call.valid()); |
274 } | 265 } |
275 | 266 |
276 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) { | 267 TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) { |
277 // Accept: | 268 // Accept: |
278 // A 0****..... | 269 // A 0****..... |
279 // B .....1**** | 270 // B .....1**** |
280 constexpr std::size_t expected_duration = kDefaultSampleRate; | 271 constexpr std::size_t expected_duration = kDefaultSampleRate; |
281 const std::vector<Turn> timing = { | 272 const std::vector<Turn> timing = { |
282 {"A", "t500", 0}, | 273 {"A", "t500", 0}, |
283 {"B", "t500", 0}, | 274 {"B", "t500", 0}, |
284 }; | 275 }; |
285 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 276 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
286 | 277 |
287 // There is one unique audio track to read. | 278 // There is one unique audio track to read. |
288 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 279 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
289 | 280 |
290 conversational_speech::MultiEndCall multiend_call( | 281 conversational_speech::MultiEndCall multiend_call( |
291 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 282 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
292 EXPECT_TRUE(multiend_call.valid()); | 283 EXPECT_TRUE(multiend_call.valid()); |
293 | 284 |
294 // Test. | 285 // Test. |
295 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 286 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
296 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | 287 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); |
297 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | 288 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); |
298 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 289 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
299 } | 290 } |
300 | 291 |
301 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) { | 292 TEST(ConversationalSpeechTest, MultiEndCallSetupPause) { |
302 // Accept: | 293 // Accept: |
303 // A 0****....... | 294 // A 0****....... |
304 // B .......1**** | 295 // B .......1**** |
305 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; | 296 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; |
306 const std::vector<Turn> timing = { | 297 const std::vector<Turn> timing = { |
307 {"A", "t500", 0}, | 298 {"A", "t500", 0}, |
308 {"B", "t500", 200}, | 299 {"B", "t500", 200}, |
309 }; | 300 }; |
310 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 301 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
311 | 302 |
312 // There is one unique audio track to read. | 303 // There is one unique audio track to read. |
313 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 304 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
314 | 305 |
315 conversational_speech::MultiEndCall multiend_call( | 306 conversational_speech::MultiEndCall multiend_call( |
316 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 307 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
317 EXPECT_TRUE(multiend_call.valid()); | 308 EXPECT_TRUE(multiend_call.valid()); |
318 | 309 |
319 // Test. | 310 // Test. |
320 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 311 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
321 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | 312 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); |
322 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | 313 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); |
323 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 314 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
324 } | 315 } |
325 | 316 |
326 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { | 317 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { |
327 // Accept: | 318 // Accept: |
328 // A 0****.... | 319 // A 0****.... |
329 // B ....1**** | 320 // B ....1**** |
330 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; | 321 constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; |
331 const std::vector<Turn> timing = { | 322 const std::vector<Turn> timing = { |
332 {"A", "t500", 0}, | 323 {"A", "t500", 0}, |
333 {"B", "t500", -100}, | 324 {"B", "t500", -100}, |
334 }; | 325 }; |
335 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 326 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
336 | 327 |
337 // There is one unique audio track to read. | 328 // There is one unique audio track to read. |
338 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 329 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
339 | 330 |
340 conversational_speech::MultiEndCall multiend_call( | 331 conversational_speech::MultiEndCall multiend_call( |
341 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 332 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
342 EXPECT_TRUE(multiend_call.valid()); | 333 EXPECT_TRUE(multiend_call.valid()); |
343 | 334 |
344 // Test. | 335 // Test. |
345 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 336 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
346 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | 337 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); |
347 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | 338 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); |
348 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 339 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
349 } | 340 } |
350 | 341 |
351 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { | 342 TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { |
352 // Reject: | 343 // Reject: |
353 // A ..0**** | 344 // A ..0**** |
354 // B .1****. The n-th turn cannot start before the (n-1)-th one. | 345 // B .1****. The n-th turn cannot start before the (n-1)-th one. |
355 const std::vector<Turn> timing = { | 346 const std::vector<Turn> timing = { |
356 {"A", "t500", 200}, | 347 {"A", "t500", 200}, |
357 {"B", "t500", -600}, | 348 {"B", "t500", -600}, |
358 }; | 349 }; |
359 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 350 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
360 | 351 |
361 // There is one unique audio track to read. | 352 // There is one unique audio track to read. |
362 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 353 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
363 | 354 |
364 conversational_speech::MultiEndCall multiend_call( | 355 conversational_speech::MultiEndCall multiend_call( |
365 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 356 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
366 EXPECT_FALSE(multiend_call.valid()); | 357 EXPECT_FALSE(multiend_call.valid()); |
367 } | 358 } |
368 | 359 |
369 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { | 360 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { |
370 // Accept: | 361 // Accept: |
371 // A 0****2****... | 362 // A 0****2****... |
372 // B ...1********* | 363 // B ...1********* |
373 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; | 364 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; |
374 const std::vector<Turn> timing = { | 365 const std::vector<Turn> timing = { |
375 {"A", "t500", 0}, | 366 {"A", "t500", 0}, |
376 {"B", "t1000", -200}, | 367 {"B", "t1000", -200}, |
377 {"A", "t500", -800}, | 368 {"A", "t500", -800}, |
378 }; | 369 }; |
379 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 370 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
380 | 371 |
381 // There are two unique audio tracks to read. | 372 // There are two unique audio tracks to read. |
382 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); | 373 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); |
383 | 374 |
384 conversational_speech::MultiEndCall multiend_call( | 375 conversational_speech::MultiEndCall multiend_call( |
385 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 376 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
386 EXPECT_TRUE(multiend_call.valid()); | 377 EXPECT_TRUE(multiend_call.valid()); |
387 | 378 |
388 // Test. | 379 // Test. |
389 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 380 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
390 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | 381 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); |
391 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); | 382 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); |
392 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 383 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
393 } | 384 } |
394 | 385 |
395 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { | 386 TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { |
396 // Reject: | 387 // Reject: |
397 // A 0****...... | 388 // A 0****...... |
398 // A ...1****... | 389 // A ...1****... |
399 // B ......2**** | 390 // B ......2**** |
400 // ^ Turn #1 overlaps with #0 which is from the same speaker. | 391 // ^ Turn #1 overlaps with #0 which is from the same speaker. |
401 const std::vector<Turn> timing = { | 392 const std::vector<Turn> timing = { |
402 {"A", "t500", 0}, | 393 {"A", "t500", 0}, |
403 {"A", "t500", -200}, | 394 {"A", "t500", -200}, |
404 {"B", "t500", -200}, | 395 {"B", "t500", -200}, |
405 }; | 396 }; |
406 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 397 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
407 | 398 |
408 // There is one unique audio track to read. | 399 // There is one unique audio track to read. |
409 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 400 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
410 | 401 |
411 conversational_speech::MultiEndCall multiend_call( | 402 conversational_speech::MultiEndCall multiend_call( |
412 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 403 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
413 EXPECT_FALSE(multiend_call.valid()); | 404 EXPECT_FALSE(multiend_call.valid()); |
414 } | 405 } |
415 | 406 |
416 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { | 407 TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { |
417 // Reject: | 408 // Reject: |
418 // A 0********* | 409 // A 0********* |
419 // B 1**....... | 410 // B 1**....... |
420 // C ...2**.... | 411 // C ...2**.... |
421 // A ......3**. | 412 // A ......3**. |
422 // ^ Turn #3 overlaps with #0 which is from the same speaker. | 413 // ^ Turn #3 overlaps with #0 which is from the same speaker. |
423 const std::vector<Turn> timing = { | 414 const std::vector<Turn> timing = { |
424 {"A", "t1000", 0}, | 415 {"A", "t1000", 0}, |
425 {"B", "t300", -1000}, | 416 {"B", "t300", -1000}, |
426 {"C", "t300", 0}, | 417 {"C", "t300", 0}, |
427 {"A", "t300", 0}, | 418 {"A", "t300", 0}, |
428 }; | 419 }; |
429 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 420 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
430 | 421 |
431 // There are two unique audio tracks to read. | 422 // There are two unique audio tracks to read. |
432 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); | 423 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); |
433 | 424 |
434 conversational_speech::MultiEndCall multiend_call( | 425 conversational_speech::MultiEndCall multiend_call( |
435 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 426 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
436 EXPECT_FALSE(multiend_call.valid()); | 427 EXPECT_FALSE(multiend_call.valid()); |
437 } | 428 } |
438 | 429 |
439 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { | 430 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { |
440 // Accept: | 431 // Accept: |
441 // A 0*********.. | 432 // A 0*********.. |
442 // B ..1****..... | 433 // B ..1****..... |
443 // C .......2**** | 434 // C .......2**** |
444 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; | 435 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; |
445 const std::vector<Turn> timing = { | 436 const std::vector<Turn> timing = { |
446 {"A", "t1000", 0}, | 437 {"A", "t1000", 0}, |
447 {"B", "t500", -800}, | 438 {"B", "t500", -800}, |
448 {"C", "t500", 0}, | 439 {"C", "t500", 0}, |
449 }; | 440 }; |
450 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 441 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
451 | 442 |
452 // There are two unique audio tracks to read. | 443 // There are two unique audio tracks to read. |
453 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); | 444 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); |
454 | 445 |
455 conversational_speech::MultiEndCall multiend_call( | 446 conversational_speech::MultiEndCall multiend_call( |
456 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 447 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
457 EXPECT_TRUE(multiend_call.valid()); | 448 EXPECT_TRUE(multiend_call.valid()); |
458 | 449 |
459 // Test. | 450 // Test. |
460 EXPECT_EQ(3u, multiend_call.speaker_names().size()); | 451 EXPECT_EQ(3u, multiend_call.speaker_names().size()); |
461 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | 452 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); |
462 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); | 453 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); |
463 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 454 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
464 } | 455 } |
465 | 456 |
466 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { | 457 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { |
467 // Reject: | 458 // Reject: |
468 // A 0********* | 459 // A 0********* |
469 // B ..1****... | 460 // B ..1****... |
470 // C ....2****. | 461 // C ....2****. |
471 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers | 462 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers |
472 // not permitted). | 463 // not permitted). |
473 const std::vector<Turn> timing = { | 464 const std::vector<Turn> timing = { |
474 {"A", "t1000", 0}, | 465 {"A", "t1000", 0}, |
475 {"B", "t500", -800}, | 466 {"B", "t500", -800}, |
476 {"C", "t500", -300}, | 467 {"C", "t500", -300}, |
477 }; | 468 }; |
478 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 469 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
479 | 470 |
480 // There are two unique audio tracks to read. | 471 // There are two unique audio tracks to read. |
481 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); | 472 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); |
482 | 473 |
483 conversational_speech::MultiEndCall multiend_call( | 474 conversational_speech::MultiEndCall multiend_call( |
484 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 475 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
485 EXPECT_FALSE(multiend_call.valid()); | 476 EXPECT_FALSE(multiend_call.valid()); |
486 } | 477 } |
487 | 478 |
488 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { | 479 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { |
489 // Accept: | 480 // Accept: |
490 // A 0*********.. | 481 // A 0*********.. |
491 // B .2****...... | 482 // B .2****...... |
492 // C .......3**** | 483 // C .......3**** |
493 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; | 484 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; |
494 const std::vector<Turn> timing = { | 485 const std::vector<Turn> timing = { |
495 {"A", "t1000", 0}, | 486 {"A", "t1000", 0}, |
496 {"B", "t500", -900}, | 487 {"B", "t500", -900}, |
497 {"C", "t500", 100}, | 488 {"C", "t500", 100}, |
498 }; | 489 }; |
499 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 490 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
500 | 491 |
501 // There are two unique audio tracks to read. | 492 // There are two unique audio tracks to read. |
502 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); | 493 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); |
503 | 494 |
504 conversational_speech::MultiEndCall multiend_call( | 495 conversational_speech::MultiEndCall multiend_call( |
505 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 496 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
506 EXPECT_TRUE(multiend_call.valid()); | 497 EXPECT_TRUE(multiend_call.valid()); |
507 | 498 |
508 // Test. | 499 // Test. |
509 EXPECT_EQ(3u, multiend_call.speaker_names().size()); | 500 EXPECT_EQ(3u, multiend_call.speaker_names().size()); |
510 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | 501 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); |
511 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); | 502 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); |
512 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 503 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
513 } | 504 } |
514 | 505 |
515 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { | 506 TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { |
516 // Accept: | 507 // Accept: |
517 // A 0**** | 508 // A 0**** |
518 // B 1**** | 509 // B 1**** |
519 const std::vector<Turn> timing = { | 510 const std::vector<Turn> timing = { |
520 {"A", "t500", 0}, | 511 {"A", "t500", 0}, |
521 {"B", "t500", -500}, | 512 {"B", "t500", -500}, |
522 }; | 513 }; |
523 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 514 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
524 | 515 |
525 // There is one unique audio track to read. | 516 // There is one unique audio track to read. |
526 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 517 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
527 | 518 |
528 conversational_speech::MultiEndCall multiend_call( | 519 conversational_speech::MultiEndCall multiend_call( |
529 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 520 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
530 EXPECT_TRUE(multiend_call.valid()); | 521 EXPECT_TRUE(multiend_call.valid()); |
531 | 522 |
532 // Test. | 523 // Test. |
533 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 524 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
534 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | 525 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); |
535 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | 526 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); |
536 } | 527 } |
537 | 528 |
538 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { | 529 TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { |
539 // Accept: | 530 // Accept: |
540 // A 0****....3****.5**. | 531 // A 0****....3****.5**. |
541 // B .....1****...4**... | 532 // B .....1****...4**... |
542 // C ......2**.......6**.. | 533 // C ......2**.......6**.. |
543 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; | 534 constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; |
544 const std::vector<Turn> timing = { | 535 const std::vector<Turn> timing = { |
545 {"A", "t500", 0}, | 536 {"A", "t500", 0}, |
546 {"B", "t500", 0}, | 537 {"B", "t500", 0}, |
547 {"C", "t300", -400}, | 538 {"C", "t300", -400}, |
548 {"A", "t500", 0}, | 539 {"A", "t500", 0}, |
(...skipping 12 matching lines...) Expand all Loading... |
561 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 552 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
562 EXPECT_TRUE(multiend_call.valid()); | 553 EXPECT_TRUE(multiend_call.valid()); |
563 | 554 |
564 // Test. | 555 // Test. |
565 EXPECT_EQ(3u, multiend_call.speaker_names().size()); | 556 EXPECT_EQ(3u, multiend_call.speaker_names().size()); |
566 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | 557 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); |
567 EXPECT_EQ(7u, multiend_call.speaking_turns().size()); | 558 EXPECT_EQ(7u, multiend_call.speaking_turns().size()); |
568 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | 559 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); |
569 } | 560 } |
570 | 561 |
571 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { | 562 TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { |
572 // Reject: | 563 // Reject: |
573 // A 0****....3****.6** | 564 // A 0****....3****.6** |
574 // B .....1****...4**.. | 565 // B .....1****...4**.. |
575 // C ......2**.....5**.. | 566 // C ......2**.....5**.. |
576 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ | 567 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ |
577 // speakers not permitted). | 568 // speakers not permitted). |
578 const std::vector<Turn> timing = { | 569 const std::vector<Turn> timing = { |
579 {"A", "t500", 0}, | 570 {"A", "t500", 0}, |
580 {"B", "t500", 0}, | 571 {"B", "t500", 0}, |
581 {"C", "t300", -400}, | 572 {"C", "t300", -400}, |
582 {"A", "t500", 0}, | 573 {"A", "t500", 0}, |
583 {"B", "t300", -100}, | 574 {"B", "t300", -100}, |
584 {"A", "t300", -200}, | 575 {"A", "t300", -200}, |
585 {"C", "t300", -200}, | 576 {"C", "t300", -200}, |
586 }; | 577 }; |
587 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | 578 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( |
588 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | 579 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, |
589 kDefaultMockWavReaderFactoryParamsMap)); | 580 kDefaultMockWavReaderFactoryParamsMap)); |
590 | 581 |
591 // There are two unique audio tracks to read. | 582 // There are two unique audio tracks to read. |
592 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); | 583 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); |
593 | 584 |
594 conversational_speech::MultiEndCall multiend_call( | 585 conversational_speech::MultiEndCall multiend_call( |
595 timing, audiotracks_path, std::move(mock_wavreader_factory)); | 586 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
596 EXPECT_FALSE(multiend_call.valid()); | 587 EXPECT_FALSE(multiend_call.valid()); |
597 } | 588 } |
598 | 589 |
599 TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { | 590 TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { |
600 // Parameters with which wav files are created. | 591 // Parameters with which wav files are created. |
601 constexpr int duration_seconds = 5; | 592 constexpr int duration_seconds = 5; |
602 const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; | 593 const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; |
603 | 594 |
604 for (int sample_rate : sample_rates) { | 595 for (int sample_rate : sample_rates) { |
605 const rtc::Pathname temp_filename( | 596 const rtc::Pathname temp_filename( |
606 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) | 597 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) |
607 + ".wav"); | 598 + ".wav"); |
608 | 599 |
609 // Write wav file. | 600 // Write wav file. |
610 const std::size_t num_samples = duration_seconds * sample_rate; | 601 const std::size_t num_samples = duration_seconds * sample_rate; |
611 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; | 602 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; |
612 CreateSineWavFile(temp_filename.pathname(), params); | 603 CreateSineWavFile(temp_filename.pathname(), params); |
613 | 604 |
614 // Load wav file and check if params match. | 605 // Load wav file and check if params match. |
615 WavReaderFactory wav_reader_factory; | 606 WavReaderFactory wav_reader_factory; |
616 MockWavReaderFactory::Params expeted_params = { | 607 MockWavReaderFactory::Params expeted_params = { |
617 sample_rate, 1u, num_samples}; | 608 sample_rate, 1u, num_samples}; |
618 CheckAudioTrackParams( | 609 CheckAudioTrackParams( |
619 wav_reader_factory, temp_filename.pathname(), expeted_params); | 610 wav_reader_factory, temp_filename.pathname(), expeted_params); |
620 | 611 |
621 // Clean up. | 612 // Clean up. |
622 remove(temp_filename.pathname().c_str()); | 613 remove(temp_filename.pathname().c_str()); |
623 } | 614 } |
624 } | 615 } |
625 | 616 |
626 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) { | 617 TEST(ConversationalSpeechTest, MultiEndCallSimulator) { |
627 // Simulated call (one character corresponding to 500 ms): | 618 // Simulated call (one character corresponding to 500 ms): |
628 // A 0*********...........2*********..... | 619 // A 0*********...........2*********..... |
629 // B ...........1*********.....3********* | 620 // B ...........1*********.....3********* |
630 const std::vector<Turn> expected_timing = { | 621 const std::vector<Turn> expected_timing = { |
631 {"A", "t5000_440.wav", 0}, | 622 {"A", "t5000_440.wav", 0}, |
632 {"B", "t5000_880.wav", 500}, | 623 {"B", "t5000_880.wav", 500}, |
633 {"A", "t5000_440.wav", 0}, | 624 {"A", "t5000_440.wav", 0}, |
634 {"B", "t5000_880.wav", -2500}, | 625 {"B", "t5000_880.wav", -2500}, |
635 }; | 626 }; |
636 const std::size_t expected_duration_seconds = 18; | 627 const std::size_t expected_duration_seconds = 18; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
673 } | 664 } |
674 | 665 |
675 // Clean. | 666 // Clean. |
676 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents( | 667 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents( |
677 rtc::Pathname(audiotracks_path))) | 668 rtc::Pathname(audiotracks_path))) |
678 << "Cannot delete temporary data directory " << audiotracks_path; | 669 << "Cannot delete temporary data directory " << audiotracks_path; |
679 } | 670 } |
680 | 671 |
681 } // namespace test | 672 } // namespace test |
682 } // namespace webrtc | 673 } // namespace webrtc |
OLD | NEW |