OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 // This file consists of unit tests for webrtc::test::conversational_speech | |
12 // members. Part of the tests focus on accepting or rejecting different | |
13 // conversational speech setups. A setup is defined by a set of audio tracks and | |
14 // timing information. | |
15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest, | |
16 // MultiEndCallSetup*) function looks like the drawing below and indicates which | |
17 // setup is tested. | |
18 // | |
19 // Accept: | |
20 // A 0****..... | |
21 // B .....1**** | |
22 // | |
23 // The drawing indicates the following: | |
24 // - the illustrated setup should be accepted, | |
25 // - there are two speakers (namely, A and B), | |
26 // - A is the first speaking, B is the second one, | |
27 // - each character after the speaker's letter indicates a time unit (e.g., 100 | |
28 // ms), | |
29 // - "*" indicates speaking, "." listening, | |
30 // - numbers indicate the turn index in std::vector<Turn>. | |
31 // | |
32 // Note that the same speaker can appear in multiple lines in order to depict | |
33 // cases in which there are wrong offsets leading to self cross-talk (which is | |
34 // rejected). | |
35 | |
11 #include <stdio.h> | 36 #include <stdio.h> |
37 #include <map> | |
12 #include <memory> | 38 #include <memory> |
13 | 39 |
40 #include "webrtc/base/logging.h" | |
14 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" | 41 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" |
15 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" | 42 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" |
16 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" | 43 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" |
17 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" | 44 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" |
18 #include "webrtc/test/gmock.h" | 45 #include "webrtc/test/gmock.h" |
19 #include "webrtc/test/gtest.h" | 46 #include "webrtc/test/gtest.h" |
20 #include "webrtc/test/testsupport/fileutils.h" | 47 #include "webrtc/test/testsupport/fileutils.h" |
21 | 48 |
22 namespace webrtc { | 49 namespace webrtc { |
23 namespace test { | 50 namespace test { |
(...skipping 13 matching lines...) Expand all Loading... | |
37 const std::vector<Turn> expected_timing = { | 64 const std::vector<Turn> expected_timing = { |
38 {"A", "a1", 0}, | 65 {"A", "a1", 0}, |
39 {"B", "b1", 0}, | 66 {"B", "b1", 0}, |
40 {"A", "a2", 100}, | 67 {"A", "a2", 100}, |
41 {"B", "b2", -200}, | 68 {"B", "b2", -200}, |
42 {"A", "a3", 0}, | 69 {"A", "a3", 0}, |
43 {"A", "a3", 0}, | 70 {"A", "a3", 0}, |
44 }; | 71 }; |
45 const std::size_t kNumberOfTurns = expected_timing.size(); | 72 const std::size_t kNumberOfTurns = expected_timing.size(); |
46 | 73 |
74 // Fake audio track parameters. | |
75 const MockWavReaderFactory::Params kMockWavReaderFactoryParams300ms = | |
76 {48000, 1u, 14400u}; // 48kHz sample rate, mono, 0.3 seconds. | |
77 const MockWavReaderFactory::Params kMockWavReaderFactoryParams500ms = | |
78 {48000, 1u, 24000u}; // 48kHz sample rate, mono, 0.5 seconds. | |
79 const MockWavReaderFactory::Params kMockWavReaderFactoryParams1000ms = | |
80 {48000, 1u, 48000u}; // 48kHz sample rate, mono, 1 second. | |
81 | |
82 // Default arguments for MockWavReaderFactory ctor. | |
83 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = | |
84 kMockWavReaderFactoryParams500ms; | |
85 const std::map<std::string, const MockWavReaderFactory::Params> | |
86 kDefaultMockWavReaderFactoryParamsMap = { | |
87 {"t300", kMockWavReaderFactoryParams300ms}, | |
88 {"t500", kMockWavReaderFactoryParams500ms}, | |
89 {"t1000", kMockWavReaderFactoryParams1000ms}, | |
AleBzk
2017/03/28 13:11:10
t300, t500 and t1000 will be used as fake audio tr
| |
90 }; | |
91 | |
92 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { | |
93 return std::unique_ptr<MockWavReaderFactory>( | |
94 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | |
95 kDefaultMockWavReaderFactoryParamsMap)); | |
96 } | |
97 | |
47 } // namespace | 98 } // namespace |
48 | 99 |
49 TEST(ConversationalSpeechTest, Settings) { | 100 class ConversationalSpeechTest : public testing::Test { |
101 public: | |
102 ConversationalSpeechTest() { | |
103 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); | |
104 } | |
105 }; | |
106 | |
107 TEST_F(ConversationalSpeechTest, Settings) { | |
50 const conversational_speech::Config config( | 108 const conversational_speech::Config config( |
51 audiotracks_path, timing_filepath, output_path); | 109 audiotracks_path, timing_filepath, output_path); |
52 | 110 |
53 // Test getters. | 111 // Test getters. |
54 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); | 112 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); |
55 EXPECT_EQ(timing_filepath, config.timing_filepath()); | 113 EXPECT_EQ(timing_filepath, config.timing_filepath()); |
56 EXPECT_EQ(output_path, config.output_path()); | 114 EXPECT_EQ(output_path, config.output_path()); |
57 } | 115 } |
58 | 116 |
59 TEST(ConversationalSpeechTest, TimingSaveLoad) { | 117 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { |
60 // Save test timing. | 118 // Save test timing. |
61 const std::string temporary_filepath = webrtc::test::TempFilename( | 119 const std::string temporary_filepath = webrtc::test::TempFilename( |
62 webrtc::test::OutputPath(), "TempTimingTestFile"); | 120 webrtc::test::OutputPath(), "TempTimingTestFile"); |
63 SaveTiming(temporary_filepath, expected_timing); | 121 SaveTiming(temporary_filepath, expected_timing); |
64 | 122 |
65 // Create a std::vector<Turn> instance by loading from file. | 123 // Create a std::vector<Turn> instance by loading from file. |
66 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); | 124 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); |
67 std::remove(temporary_filepath.c_str()); | 125 std::remove(temporary_filepath.c_str()); |
68 | 126 |
69 // Check size. | 127 // Check size. |
70 EXPECT_EQ(expected_timing.size(), actual_timing.size()); | 128 EXPECT_EQ(expected_timing.size(), actual_timing.size()); |
71 | 129 |
72 // Check Turn instances. | 130 // Check Turn instances. |
73 for (size_t index = 0; index < expected_timing.size(); ++index) { | 131 for (size_t index = 0; index < expected_timing.size(); ++index) { |
74 EXPECT_EQ(expected_timing[index], actual_timing[index]) | 132 EXPECT_EQ(expected_timing[index], actual_timing[index]) |
75 << "turn #" << index << " not matching"; | 133 << "turn #" << index << " not matching"; |
76 } | 134 } |
77 } | 135 } |
78 | 136 |
79 TEST(ConversationalSpeechTest, MultiEndCallCreate) { | 137 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { |
80 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | 138 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
81 new MockWavReaderFactory()); | |
82 | 139 |
83 // There are 5 unique audio tracks to read. | 140 // There are 5 unique audio tracks to read. |
84 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); | 141 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); |
85 | 142 |
86 // Inject the mock wav reader factory. | 143 // Inject the mock wav reader factory. |
87 conversational_speech::MultiEndCall multiend_call( | 144 conversational_speech::MultiEndCall multiend_call( |
88 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); | 145 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); |
146 EXPECT_TRUE(multiend_call.valid()); | |
89 | 147 |
90 // Test. | 148 // Test. |
91 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 149 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
92 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); | 150 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); |
93 } | 151 } |
94 | 152 |
153 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) { | |
154 const std::vector<Turn> timing = { | |
155 {"A", "t500", -100}, | |
156 {"B", "t500", 0}, | |
157 }; | |
158 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
159 | |
160 // There is one unique audio track to read. | |
161 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
162 | |
163 conversational_speech::MultiEndCall multiend_call( | |
164 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
165 EXPECT_FALSE(multiend_call.valid()); | |
166 } | |
167 | |
168 | |
169 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) { | |
170 // Accept: | |
171 // A 0****..... | |
172 // B .....1**** | |
173 const std::vector<Turn> timing = { | |
174 {"A", "t500", 0}, | |
175 {"B", "t500", 0}, | |
176 }; | |
177 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
178 | |
179 // There is one unique audio track to read. | |
180 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
181 | |
182 conversational_speech::MultiEndCall multiend_call( | |
183 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
184 EXPECT_TRUE(multiend_call.valid()); | |
185 } | |
186 | |
187 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) { | |
188 // Accept: | |
189 // A 0****....... | |
190 // B .......1**** | |
191 const std::vector<Turn> timing = { | |
192 {"A", "t500", 0}, | |
193 {"B", "t500", 200}, | |
194 }; | |
195 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
196 | |
197 // There is one unique audio track to read. | |
198 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
199 | |
200 conversational_speech::MultiEndCall multiend_call( | |
201 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
202 EXPECT_TRUE(multiend_call.valid()); | |
203 } | |
204 | |
205 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { | |
206 // Accept: | |
207 // A 0****... | |
208 // B ...1**** | |
209 const std::vector<Turn> timing = { | |
210 {"A", "t500", 0}, | |
211 {"B", "t500", -100}, | |
212 }; | |
213 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
214 | |
215 // There is one unique audio track to read. | |
216 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
217 | |
218 conversational_speech::MultiEndCall multiend_call( | |
219 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
220 EXPECT_TRUE(multiend_call.valid()); | |
221 } | |
222 | |
223 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { | |
224 // Reject: | |
225 // A ..0**** | |
226 // B .1****. The n-th turn cannot start before the (n-1)-th one. | |
227 const std::vector<Turn> timing = { | |
228 {"A", "t500", 200}, | |
229 {"B", "t500", -600}, | |
230 }; | |
231 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
232 | |
233 // There is one unique audio track to read. | |
234 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
235 | |
236 conversational_speech::MultiEndCall multiend_call( | |
237 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
238 EXPECT_FALSE(multiend_call.valid()); | |
239 } | |
240 | |
241 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { | |
242 // Accept: | |
243 // A 0***2****... | |
244 // B ..1********* | |
245 const std::vector<Turn> timing = { | |
246 {"A", "t500", 0}, | |
247 {"B", "t1000", -200}, | |
248 {"A", "t500", -800}, | |
249 }; | |
250 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
251 | |
252 // There are two unique audio tracks to read. | |
253 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
254 | |
255 conversational_speech::MultiEndCall multiend_call( | |
256 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
257 EXPECT_TRUE(multiend_call.valid()); | |
258 } | |
259 | |
260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { | |
261 // Reject: | |
262 // A 0****...... | |
263 // A ...1****... | |
264 // B ......2**** | |
265 // ^ Turn #1 overlaps with #0 which is from the same speaker. | |
266 const std::vector<Turn> timing = { | |
267 {"A", "t500", 0}, | |
268 {"A", "t500", -200}, | |
269 {"B", "t500", -200}, | |
270 }; | |
271 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
272 | |
273 // There is one unique audio track to read. | |
274 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
275 | |
276 conversational_speech::MultiEndCall multiend_call( | |
277 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
278 EXPECT_FALSE(multiend_call.valid()); | |
279 } | |
280 | |
281 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { | |
282 // Reject: | |
283 // A 0********* | |
284 // B 1**....... | |
285 // C ...2**.... | |
286 // A ......3**. | |
287 // ^ Turn #3 overlaps with #0 which is from the same speaker. | |
288 const std::vector<Turn> timing = { | |
289 {"A", "t1000", 0}, | |
290 {"B", "t300", -1000}, | |
291 {"C", "t300", 0}, | |
292 {"A", "t300", 0}, | |
293 }; | |
294 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
295 | |
296 // There are two unique audio tracks to read. | |
297 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
298 | |
299 conversational_speech::MultiEndCall multiend_call( | |
300 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
301 EXPECT_FALSE(multiend_call.valid()); | |
302 } | |
303 | |
304 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { | |
305 // Accept: | |
306 // A 0*********.. | |
307 // B ..1****..... | |
308 // C .......2**** | |
309 const std::vector<Turn> timing = { | |
310 {"A", "t1000", 0}, | |
311 {"B", "t500", -800}, | |
312 {"C", "t500", 0}, | |
313 }; | |
314 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
315 | |
316 // There are two unique audio tracks to read. | |
317 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
318 | |
319 conversational_speech::MultiEndCall multiend_call( | |
320 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
321 EXPECT_TRUE(multiend_call.valid()); | |
322 } | |
323 | |
324 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { | |
325 // Reject: | |
326 // A 0********* | |
327 // B ..1****... | |
328 // C ....2****. | |
329 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers | |
330 // not permitted). | |
331 const std::vector<Turn> timing = { | |
332 {"A", "t1000", 0}, | |
333 {"B", "t500", -800}, | |
334 {"C", "t500", -300}, | |
335 }; | |
336 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
337 | |
338 // There are two unique audio tracks to read. | |
339 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
340 | |
341 conversational_speech::MultiEndCall multiend_call( | |
342 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
343 EXPECT_FALSE(multiend_call.valid()); | |
344 } | |
345 | |
346 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { | |
347 // Accept: | |
348 // A 0*********.. | |
349 // B .2****...... | |
350 // C .......3**** | |
351 const std::vector<Turn> timing = { | |
352 {"A", "t1000", 0}, | |
353 {"B", "t500", -900}, | |
354 {"C", "t500", 100}, | |
355 }; | |
356 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
357 | |
358 // There are two unique audio tracks to read. | |
359 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
360 | |
361 conversational_speech::MultiEndCall multiend_call( | |
362 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
363 EXPECT_TRUE(multiend_call.valid()); | |
364 } | |
365 | |
366 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { | |
367 // Accept: | |
368 // A 0**** | |
369 // B 1**** | |
370 const std::vector<Turn> timing = { | |
371 {"A", "t500", 0}, | |
372 {"B", "t500", -500}, | |
373 }; | |
374 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
375 | |
376 // There is one unique audio track to read. | |
377 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
378 | |
379 conversational_speech::MultiEndCall multiend_call( | |
380 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
381 EXPECT_TRUE(multiend_call.valid()); | |
382 } | |
383 | |
384 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { | |
385 // Accept: | |
386 // A 0****....3****.5**... | |
387 // B .....1****...4**..... | |
388 // C ......2**.......6**.. | |
389 const std::vector<Turn> timing = { | |
390 {"A", "t500", 0}, | |
391 {"B", "t500", 0}, | |
392 {"C", "t300", -400}, | |
393 {"A", "t500", 0}, | |
394 {"B", "t300", -100}, | |
395 {"A", "t300", -100}, | |
396 {"C", "t300", -200}, | |
397 }; | |
398 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | |
399 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | |
400 kDefaultMockWavReaderFactoryParamsMap)); | |
401 | |
402 // There are two unique audio tracks to read. | |
403 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
404 | |
405 conversational_speech::MultiEndCall multiend_call( | |
406 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
407 EXPECT_TRUE(multiend_call.valid()); | |
408 } | |
409 | |
410 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { | |
411 // Reject: | |
412 // A 0****....3****.6** | |
413 // B .....1****...4**.. | |
414 // C ......2**.....5**.. | |
415 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ | |
416 // speakers not permitted). | |
417 const std::vector<Turn> timing = { | |
418 {"A", "t500", 0}, | |
419 {"B", "t500", 0}, | |
420 {"C", "t300", -400}, | |
421 {"A", "t500", 0}, | |
422 {"B", "t300", -100}, | |
423 {"A", "t300", -200}, | |
424 {"C", "t300", -200}, | |
425 }; | |
426 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | |
427 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | |
428 kDefaultMockWavReaderFactoryParamsMap)); | |
429 | |
430 // There are two unique audio tracks to read. | |
431 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
432 | |
433 conversational_speech::MultiEndCall multiend_call( | |
434 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
435 EXPECT_FALSE(multiend_call.valid()); | |
436 } | |
437 | |
95 } // namespace test | 438 } // namespace test |
96 } // namespace webrtc | 439 } // namespace webrtc |
OLD | NEW |