Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 // This file consists of unit tests for webrtc::test::conversational_speech | |
| 12 // members. Part of them focus on accepting or rejecting different | |
| 13 // conversational speech setups. A setup is defined by a set of audio tracks and | |
| 14 // timing information). | |
| 15 // The docstring at the beginning of each TEST_F(ConversationalSpeechTest, | |
| 16 // MultiEndCallSetup*) function looks like the drawing below and indicates which | |
| 17 // setup is tested. | |
| 18 // | |
| 19 // Accept: | |
| 20 // A 0****..... | |
| 21 // B .....1**** | |
| 22 // | |
| 23 // The drawing indicates the following: | |
| 24 // - the illustrated setup should be accepted, | |
| 25 // - there are two speakers (namely, A and B), | |
| 26 // - A is the first speaking, B is the second one, | |
| 27 // - each character after the speaker's letter indicates a time unit (e.g., 100 | |
| 28 // ms), | |
| 29 // - "*" indicates speaking, "." listening, | |
| 30 // - numbers indicate the turn index in std::vector<Turn>. | |
| 31 // | |
| 32 // Note that the same speaker can appear in multiple lines in order to depict | |
| 33 // cases in which there are wrong offsets leading to self cross-talk (which is | |
| 34 // rejected). | |
| 35 | |
| 11 #include <stdio.h> | 36 #include <stdio.h> |
| 37 #include <map> | |
| 12 #include <memory> | 38 #include <memory> |
| 13 | 39 |
| 40 #include "webrtc/base/logging.h" | |
| 14 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" | 41 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" |
| 15 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" | 42 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" |
| 16 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" | 43 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" |
| 17 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" | 44 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" |
| 18 #include "webrtc/test/gmock.h" | 45 #include "webrtc/test/gmock.h" |
| 19 #include "webrtc/test/gtest.h" | 46 #include "webrtc/test/gtest.h" |
| 20 #include "webrtc/test/testsupport/fileutils.h" | 47 #include "webrtc/test/testsupport/fileutils.h" |
| 21 | 48 |
| 22 namespace webrtc { | 49 namespace webrtc { |
| 23 namespace test { | 50 namespace test { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 37 const std::vector<Turn> expected_timing = { | 64 const std::vector<Turn> expected_timing = { |
| 38 {"A", "a1", 0}, | 65 {"A", "a1", 0}, |
| 39 {"B", "b1", 0}, | 66 {"B", "b1", 0}, |
| 40 {"A", "a2", 100}, | 67 {"A", "a2", 100}, |
| 41 {"B", "b2", -200}, | 68 {"B", "b2", -200}, |
| 42 {"A", "a3", 0}, | 69 {"A", "a3", 0}, |
| 43 {"A", "a3", 0}, | 70 {"A", "a3", 0}, |
| 44 }; | 71 }; |
| 45 const std::size_t kNumberOfTurns = expected_timing.size(); | 72 const std::size_t kNumberOfTurns = expected_timing.size(); |
| 46 | 73 |
| 74 // Fake audio track parameters. | |
| 75 const int kDefaultSampleRate = 48000; | |
|
hlundin-webrtc
2017/04/06 08:10:03
constexpr
AleBzk
2017/04/06 16:42:41
Done.
| |
| 76 const std::map<std::string, const MockWavReaderFactory::Params> | |
|
hlundin-webrtc
2017/04/06 08:10:03
constexpr?
AleBzk
2017/04/06 16:42:41
Nope (error: constexpr variable cannot have non-li
hlundin-webrtc
2017/04/07 10:24:09
Acknowledged.
| |
| 77 kDefaultMockWavReaderFactoryParamsMap = { | |
| 78 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds. | |
| 79 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds. | |
| 80 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds. | |
| 81 }; | |
| 82 | |
| 83 // Default arguments for MockWavReaderFactory ctor. | |
|
hlundin-webrtc
2017/04/06 08:10:03
This comment should go above the preceding map, ri
AleBzk
2017/04/06 16:42:41
Done.
| |
| 84 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = | |
|
hlundin-webrtc
2017/04/06 08:10:03
constexpr?
AleBzk
2017/04/06 16:42:41
Nope because I can't use constexpr with the const
hlundin-webrtc
2017/04/07 10:24:09
Acknowledged.
| |
| 85 kDefaultMockWavReaderFactoryParamsMap.at("t500"); | |
| 86 | |
| 87 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { | |
| 88 return std::unique_ptr<MockWavReaderFactory>( | |
| 89 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | |
| 90 kDefaultMockWavReaderFactoryParamsMap)); | |
| 91 } | |
| 92 | |
| 47 } // namespace | 93 } // namespace |
| 48 | 94 |
| 49 TEST(ConversationalSpeechTest, Settings) { | 95 class ConversationalSpeechTest : public testing::Test { |
| 96 public: | |
| 97 ConversationalSpeechTest() { | |
| 98 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); | |
|
hlundin-webrtc
2017/04/06 08:10:03
I don't think you want to do all of this verbose l
AleBzk
2017/04/06 16:42:41
You won't see any log unless you add "--logs true"
hlundin-webrtc
2017/04/07 10:24:09
OK. Add a TODO in the code about his.
AleBzk
2017/04/07 11:37:06
Done.
| |
| 99 } | |
| 100 }; | |
| 101 | |
| 102 TEST_F(ConversationalSpeechTest, Settings) { | |
| 50 const conversational_speech::Config config( | 103 const conversational_speech::Config config( |
| 51 audiotracks_path, timing_filepath, output_path); | 104 audiotracks_path, timing_filepath, output_path); |
| 52 | 105 |
| 53 // Test getters. | 106 // Test getters. |
| 54 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); | 107 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); |
| 55 EXPECT_EQ(timing_filepath, config.timing_filepath()); | 108 EXPECT_EQ(timing_filepath, config.timing_filepath()); |
| 56 EXPECT_EQ(output_path, config.output_path()); | 109 EXPECT_EQ(output_path, config.output_path()); |
| 57 } | 110 } |
| 58 | 111 |
| 59 TEST(ConversationalSpeechTest, TimingSaveLoad) { | 112 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { |
| 60 // Save test timing. | 113 // Save test timing. |
| 61 const std::string temporary_filepath = webrtc::test::TempFilename( | 114 const std::string temporary_filepath = webrtc::test::TempFilename( |
| 62 webrtc::test::OutputPath(), "TempTimingTestFile"); | 115 webrtc::test::OutputPath(), "TempTimingTestFile"); |
| 63 SaveTiming(temporary_filepath, expected_timing); | 116 SaveTiming(temporary_filepath, expected_timing); |
| 64 | 117 |
| 65 // Create a std::vector<Turn> instance by loading from file. | 118 // Create a std::vector<Turn> instance by loading from file. |
| 66 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); | 119 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); |
| 67 std::remove(temporary_filepath.c_str()); | 120 std::remove(temporary_filepath.c_str()); |
| 68 | 121 |
| 69 // Check size. | 122 // Check size. |
| 70 EXPECT_EQ(expected_timing.size(), actual_timing.size()); | 123 EXPECT_EQ(expected_timing.size(), actual_timing.size()); |
| 71 | 124 |
| 72 // Check Turn instances. | 125 // Check Turn instances. |
| 73 for (size_t index = 0; index < expected_timing.size(); ++index) { | 126 for (size_t index = 0; index < expected_timing.size(); ++index) { |
| 74 EXPECT_EQ(expected_timing[index], actual_timing[index]) | 127 EXPECT_EQ(expected_timing[index], actual_timing[index]) |
| 75 << "turn #" << index << " not matching"; | 128 << "turn #" << index << " not matching"; |
| 76 } | 129 } |
| 77 } | 130 } |
| 78 | 131 |
| 79 TEST(ConversationalSpeechTest, MultiEndCallCreate) { | 132 TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { |
| 80 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | 133 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| 81 new MockWavReaderFactory()); | |
| 82 | 134 |
| 83 // There are 5 unique audio tracks to read. | 135 // There are 5 unique audio tracks to read. |
| 84 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); | 136 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); |
| 85 | 137 |
| 86 // Inject the mock wav reader factory. | 138 // Inject the mock wav reader factory. |
| 87 conversational_speech::MultiEndCall multiend_call( | 139 conversational_speech::MultiEndCall multiend_call( |
| 88 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); | 140 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| 141 EXPECT_TRUE(multiend_call.valid()); | |
| 89 | 142 |
| 90 // Test. | 143 // Test. |
| 91 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 144 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
| 92 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); | 145 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); |
| 146 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); | |
| 147 } | |
| 148 | |
| 149 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) { | |
|
hlundin-webrtc
2017/04/06 08:10:03
"NonNegative"? Seems negative to me.
AleBzk
2017/04/06 16:42:41
Sorry. I should have called it RejectNonNegative,
| |
| 150 const std::vector<Turn> timing = { | |
| 151 {"A", "t500", -100}, | |
| 152 {"B", "t500", 0}, | |
| 153 }; | |
| 154 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 155 | |
| 156 // There is one unique audio track to read. | |
| 157 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
|
hlundin-webrtc
2017/04/06 08:10:03
I suggest you do
using testing::_
to make all of t
AleBzk
2017/04/06 16:42:41
Done.
| |
| 158 | |
| 159 conversational_speech::MultiEndCall multiend_call( | |
| 160 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 161 EXPECT_FALSE(multiend_call.valid()); | |
| 162 } | |
| 163 | |
| 164 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) { | |
| 165 // Accept: | |
| 166 // A 0****..... | |
| 167 // B .....1**** | |
| 168 const std::size_t expected_duration = kDefaultSampleRate; | |
|
hlundin-webrtc
2017/04/06 08:10:03
Throughout the file: const -> constexpr when possi
AleBzk
2017/04/06 16:42:41
Done.
| |
| 169 const std::vector<Turn> timing = { | |
| 170 {"A", "t500", 0}, | |
| 171 {"B", "t500", 0}, | |
| 172 }; | |
| 173 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 174 | |
| 175 // There is one unique audio track to read. | |
| 176 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
| 177 | |
| 178 conversational_speech::MultiEndCall multiend_call( | |
| 179 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 180 EXPECT_TRUE(multiend_call.valid()); | |
| 181 | |
| 182 // Test. | |
| 183 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | |
| 184 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | |
| 185 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | |
| 186 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 187 } | |
| 188 | |
| 189 TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) { | |
| 190 // Accept: | |
| 191 // A 0****....... | |
| 192 // B .......1**** | |
| 193 const std::size_t expected_duration = kDefaultSampleRate * 1.2; | |
| 194 const std::vector<Turn> timing = { | |
| 195 {"A", "t500", 0}, | |
| 196 {"B", "t500", 200}, | |
| 197 }; | |
| 198 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 199 | |
| 200 // There is one unique audio track to read. | |
| 201 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
| 202 | |
| 203 conversational_speech::MultiEndCall multiend_call( | |
| 204 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 205 EXPECT_TRUE(multiend_call.valid()); | |
| 206 | |
| 207 // Test. | |
| 208 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | |
| 209 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | |
| 210 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | |
| 211 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 212 } | |
| 213 | |
| 214 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { | |
| 215 // Accept: | |
| 216 // A 0****.... | |
| 217 // B ....1**** | |
| 218 const std::size_t expected_duration = kDefaultSampleRate * 0.9; | |
| 219 const std::vector<Turn> timing = { | |
| 220 {"A", "t500", 0}, | |
| 221 {"B", "t500", -100}, | |
| 222 }; | |
| 223 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 224 | |
| 225 // There is one unique audio track to read. | |
| 226 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
| 227 | |
| 228 conversational_speech::MultiEndCall multiend_call( | |
| 229 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 230 EXPECT_TRUE(multiend_call.valid()); | |
| 231 | |
| 232 // Test. | |
| 233 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | |
| 234 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | |
| 235 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | |
| 236 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 237 } | |
| 238 | |
| 239 TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { | |
| 240 // Reject: | |
| 241 // A ..0**** | |
| 242 // B .1****. The n-th turn cannot start before the (n-1)-th one. | |
| 243 const std::vector<Turn> timing = { | |
| 244 {"A", "t500", 200}, | |
| 245 {"B", "t500", -600}, | |
| 246 }; | |
| 247 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 248 | |
| 249 // There is one unique audio track to read. | |
| 250 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
| 251 | |
| 252 conversational_speech::MultiEndCall multiend_call( | |
| 253 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 254 EXPECT_FALSE(multiend_call.valid()); | |
| 255 } | |
| 256 | |
| 257 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { | |
| 258 // Accept: | |
| 259 // A 0****2****... | |
| 260 // B ...1********* | |
| 261 const std::size_t expected_duration = kDefaultSampleRate * 1.3; | |
| 262 const std::vector<Turn> timing = { | |
| 263 {"A", "t500", 0}, | |
| 264 {"B", "t1000", -200}, | |
| 265 {"A", "t500", -800}, | |
| 266 }; | |
| 267 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 268 | |
| 269 // There are two unique audio tracks to read. | |
| 270 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 271 | |
| 272 conversational_speech::MultiEndCall multiend_call( | |
| 273 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 274 EXPECT_TRUE(multiend_call.valid()); | |
| 275 | |
| 276 // Test. | |
| 277 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | |
| 278 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | |
| 279 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); | |
| 280 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 281 } | |
| 282 | |
| 283 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { | |
| 284 // Reject: | |
| 285 // A 0****...... | |
| 286 // A ...1****... | |
| 287 // B ......2**** | |
| 288 // ^ Turn #1 overlaps with #0 which is from the same speaker. | |
| 289 const std::vector<Turn> timing = { | |
| 290 {"A", "t500", 0}, | |
| 291 {"A", "t500", -200}, | |
| 292 {"B", "t500", -200}, | |
| 293 }; | |
| 294 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 295 | |
| 296 // There is one unique audio track to read. | |
| 297 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
| 298 | |
| 299 conversational_speech::MultiEndCall multiend_call( | |
| 300 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 301 EXPECT_FALSE(multiend_call.valid()); | |
| 302 } | |
| 303 | |
| 304 TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { | |
| 305 // Reject: | |
| 306 // A 0********* | |
| 307 // B 1**....... | |
| 308 // C ...2**.... | |
| 309 // A ......3**. | |
| 310 // ^ Turn #3 overlaps with #0 which is from the same speaker. | |
| 311 const std::vector<Turn> timing = { | |
| 312 {"A", "t1000", 0}, | |
| 313 {"B", "t300", -1000}, | |
| 314 {"C", "t300", 0}, | |
| 315 {"A", "t300", 0}, | |
| 316 }; | |
| 317 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 318 | |
| 319 // There are two unique audio tracks to read. | |
| 320 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 321 | |
| 322 conversational_speech::MultiEndCall multiend_call( | |
| 323 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 324 EXPECT_FALSE(multiend_call.valid()); | |
| 325 } | |
| 326 | |
| 327 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { | |
| 328 // Accept: | |
| 329 // A 0*********.. | |
| 330 // B ..1****..... | |
| 331 // C .......2**** | |
| 332 const std::size_t expected_duration = kDefaultSampleRate * 1.2; | |
| 333 const std::vector<Turn> timing = { | |
| 334 {"A", "t1000", 0}, | |
| 335 {"B", "t500", -800}, | |
| 336 {"C", "t500", 0}, | |
| 337 }; | |
| 338 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 339 | |
| 340 // There are two unique audio tracks to read. | |
| 341 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 342 | |
| 343 conversational_speech::MultiEndCall multiend_call( | |
| 344 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 345 EXPECT_TRUE(multiend_call.valid()); | |
| 346 | |
| 347 // Test. | |
| 348 EXPECT_EQ(3u, multiend_call.speaker_names().size()); | |
| 349 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | |
| 350 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); | |
| 351 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 352 } | |
| 353 | |
| 354 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { | |
| 355 // Reject: | |
| 356 // A 0********* | |
| 357 // B ..1****... | |
| 358 // C ....2****. | |
| 359 // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers | |
| 360 // not permitted). | |
| 361 const std::vector<Turn> timing = { | |
| 362 {"A", "t1000", 0}, | |
| 363 {"B", "t500", -800}, | |
| 364 {"C", "t500", -300}, | |
| 365 }; | |
| 366 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 367 | |
| 368 // There are two unique audio tracks to read. | |
| 369 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 370 | |
| 371 conversational_speech::MultiEndCall multiend_call( | |
| 372 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 373 EXPECT_FALSE(multiend_call.valid()); | |
| 374 } | |
| 375 | |
| 376 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { | |
| 377 // Accept: | |
| 378 // A 0*********.. | |
| 379 // B .2****...... | |
| 380 // C .......3**** | |
| 381 const std::size_t expected_duration = kDefaultSampleRate * 1.2; | |
| 382 const std::vector<Turn> timing = { | |
| 383 {"A", "t1000", 0}, | |
| 384 {"B", "t500", -900}, | |
| 385 {"C", "t500", 100}, | |
| 386 }; | |
| 387 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 388 | |
| 389 // There are two unique audio tracks to read. | |
| 390 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 391 | |
| 392 conversational_speech::MultiEndCall multiend_call( | |
| 393 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 394 EXPECT_TRUE(multiend_call.valid()); | |
| 395 | |
| 396 // Test. | |
| 397 EXPECT_EQ(3u, multiend_call.speaker_names().size()); | |
| 398 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | |
| 399 EXPECT_EQ(3u, multiend_call.speaking_turns().size()); | |
| 400 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 401 } | |
| 402 | |
| 403 TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { | |
| 404 // Accept: | |
| 405 // A 0**** | |
| 406 // B 1**** | |
| 407 const std::vector<Turn> timing = { | |
| 408 {"A", "t500", 0}, | |
| 409 {"B", "t500", -500}, | |
| 410 }; | |
| 411 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
| 412 | |
| 413 // There is one unique audio track to read. | |
| 414 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
| 415 | |
| 416 conversational_speech::MultiEndCall multiend_call( | |
| 417 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 418 EXPECT_TRUE(multiend_call.valid()); | |
| 419 | |
| 420 // Test. | |
| 421 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | |
| 422 EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); | |
| 423 EXPECT_EQ(2u, multiend_call.speaking_turns().size()); | |
| 424 } | |
| 425 | |
| 426 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { | |
| 427 // Accept: | |
| 428 // A 0****....3****.5**. | |
| 429 // B .....1****...4**... | |
| 430 // C ......2**.......6**.. | |
| 431 const std::size_t expected_duration = kDefaultSampleRate * 1.9; | |
| 432 const std::vector<Turn> timing = { | |
| 433 {"A", "t500", 0}, | |
| 434 {"B", "t500", 0}, | |
| 435 {"C", "t300", -400}, | |
| 436 {"A", "t500", 0}, | |
| 437 {"B", "t300", -100}, | |
| 438 {"A", "t300", -100}, | |
| 439 {"C", "t300", -200}, | |
| 440 }; | |
| 441 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | |
| 442 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | |
| 443 kDefaultMockWavReaderFactoryParamsMap)); | |
| 444 | |
| 445 // There are two unique audio tracks to read. | |
| 446 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 447 | |
| 448 conversational_speech::MultiEndCall multiend_call( | |
| 449 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 450 EXPECT_TRUE(multiend_call.valid()); | |
| 451 | |
| 452 // Test. | |
| 453 EXPECT_EQ(3u, multiend_call.speaker_names().size()); | |
| 454 EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); | |
| 455 EXPECT_EQ(7u, multiend_call.speaking_turns().size()); | |
| 456 EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); | |
| 457 } | |
| 458 | |
| 459 TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { | |
| 460 // Reject: | |
| 461 // A 0****....3****.6** | |
| 462 // B .....1****...4**.. | |
| 463 // C ......2**.....5**.. | |
| 464 // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ | |
| 465 // speakers not permitted). | |
| 466 const std::vector<Turn> timing = { | |
| 467 {"A", "t500", 0}, | |
| 468 {"B", "t500", 0}, | |
| 469 {"C", "t300", -400}, | |
| 470 {"A", "t500", 0}, | |
| 471 {"B", "t300", -100}, | |
| 472 {"A", "t300", -200}, | |
| 473 {"C", "t300", -200}, | |
| 474 }; | |
| 475 auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( | |
| 476 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | |
| 477 kDefaultMockWavReaderFactoryParamsMap)); | |
| 478 | |
| 479 // There are two unique audio tracks to read. | |
| 480 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
| 481 | |
| 482 conversational_speech::MultiEndCall multiend_call( | |
| 483 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
| 484 EXPECT_FALSE(multiend_call.valid()); | |
| 93 } | 485 } |
| 94 | 486 |
| 95 } // namespace test | 487 } // namespace test |
| 96 } // namespace webrtc | 488 } // namespace webrtc |
| OLD | NEW |