Chromium Code Reviews| Index: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
| diff --git a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
| index 59454d9d47d26c864b5b1350c0c0ddde2b5f7d58..1b06b0ae9b62799803f6d803ca04a9c111a0c3ce 100644 |
| --- a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
| +++ b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
| @@ -8,9 +8,36 @@ |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| +// This file consists of unit tests for webrtc::test::conversational_speech |
| +// members. Part of the tests focus on accepting or rejecting different |
| +// conversational speech setups. A setup is defined by a set of audio tracks and |
| +// timing information. |
| +// The docstring at the beginning of each TEST_F(ConversationalSpeechTest, |
| +// MultiEndCallSetup*) function looks like the drawing below and indicates which |
| +// setup is tested. |
| +// |
| +// Accept: |
| +// A 0****..... |
| +// B .....1**** |
| +// |
| +// The drawing indicates the following: |
| +// - the illustrated setup should be accepted, |
| +// - there are two speakers (namely, A and B), |
| +// - A is the first speaking, B is the second one, |
| +// - each character after the speaker's letter indicates a time unit (e.g., 100 |
| +// ms), |
| +// - "*" indicates speaking, "." listening, |
| +// - numbers indicate the turn index in std::vector<Turn>. |
| +// |
| +// Note that the same speaker can appear in multiple lines in order to depict |
| +// cases in which there are wrong offsets leading to self cross-talk (which is |
| +// rejected). |
| + |
| #include <stdio.h> |
| +#include <map> |
| #include <memory> |
| +#include "webrtc/base/logging.h" |
| #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" |
| #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" |
| #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h" |
| @@ -44,9 +71,40 @@ const std::vector<Turn> expected_timing = { |
| }; |
| const std::size_t kNumberOfTurns = expected_timing.size(); |
| +// Fake audio track parameters. |
| +const MockWavReaderFactory::Params kMockWavReaderFactoryParams300ms = |
| + {48000, 1u, 14400u}; // 48kHz sample rate, mono, 0.3 seconds. |
| +const MockWavReaderFactory::Params kMockWavReaderFactoryParams500ms = |
| + {48000, 1u, 24000u}; // 48kHz sample rate, mono, 0.5 seconds. |
| +const MockWavReaderFactory::Params kMockWavReaderFactoryParams1000ms = |
| + {48000, 1u, 48000u}; // 48kHz sample rate, mono, 1 second. |
| + |
| +// Default arguments for MockWavReaderFactory ctor. |
| +const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = |
| + kMockWavReaderFactoryParams500ms; |
| +const std::map<std::string, const MockWavReaderFactory::Params> |
| + kDefaultMockWavReaderFactoryParamsMap = { |
| + {"t300", kMockWavReaderFactoryParams300ms}, |
| + {"t500", kMockWavReaderFactoryParams500ms}, |
| + {"t1000", kMockWavReaderFactoryParams1000ms}, |
|
AleBzk
2017/03/28 13:11:10
t300, t500 and t1000 will be used as fake audio tr
|
| +}; |
| + |
| +std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { |
| + return std::unique_ptr<MockWavReaderFactory>( |
| + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, |
| + kDefaultMockWavReaderFactoryParamsMap)); |
| +} |
| + |
| } // namespace |
| -TEST(ConversationalSpeechTest, Settings) { |
| +class ConversationalSpeechTest : public testing::Test { |
| + public: |
| + ConversationalSpeechTest() { |
| + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); |
| + } |
| +}; |
| + |
| +TEST_F(ConversationalSpeechTest, Settings) { |
| const conversational_speech::Config config( |
| audiotracks_path, timing_filepath, output_path); |
| @@ -56,7 +114,7 @@ TEST(ConversationalSpeechTest, Settings) { |
| EXPECT_EQ(output_path, config.output_path()); |
| } |
| -TEST(ConversationalSpeechTest, TimingSaveLoad) { |
| +TEST_F(ConversationalSpeechTest, TimingSaveLoad) { |
| // Save test timing. |
| const std::string temporary_filepath = webrtc::test::TempFilename( |
| webrtc::test::OutputPath(), "TempTimingTestFile"); |
| @@ -76,9 +134,8 @@ TEST(ConversationalSpeechTest, TimingSaveLoad) { |
| } |
| } |
| -TEST(ConversationalSpeechTest, MultiEndCallCreate) { |
| - auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( |
| - new MockWavReaderFactory()); |
| +TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| // There are 5 unique audio tracks to read. |
| EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(5); |
| @@ -86,11 +143,297 @@ TEST(ConversationalSpeechTest, MultiEndCallCreate) { |
| // Inject the mock wav reader factory. |
| conversational_speech::MultiEndCall multiend_call( |
| expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| // Test. |
| EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
| EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); |
| } |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) { |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", -100}, |
| + {"B", "t500", 0}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_FALSE(multiend_call.valid()); |
| +} |
| + |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupSimple) { |
| + // Accept: |
| + // A 0****..... |
| + // B .....1**** |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t500", 0}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupPause) { |
| + // Accept: |
| + // A 0****....... |
| + // B .......1**** |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t500", 200}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { |
| + // Accept: |
| + // A 0****... |
| + // B ...1**** |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t500", -100}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { |
| + // Reject: |
| + // A ..0**** |
| + // B .1****. The n-th turn cannot start before the (n-1)-th one. |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 200}, |
| + {"B", "t500", -600}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_FALSE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { |
| + // Accept: |
| + // A 0***2****... |
| + // B ..1********* |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t1000", -200}, |
| + {"A", "t500", -800}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { |
| + // Reject: |
| + // A 0****...... |
| + // A ...1****... |
| + // B ......2**** |
| + // ^ Turn #1 overlaps with #0 which is from the same speaker. |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"A", "t500", -200}, |
| + {"B", "t500", -200}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_FALSE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { |
| + // Reject: |
| + // A 0********* |
| + // B 1**....... |
| + // C ...2**.... |
| + // A ......3**. |
| + // ^ Turn #3 overlaps with #0 which is from the same speaker. |
| + const std::vector<Turn> timing = { |
| + {"A", "t1000", 0}, |
| + {"B", "t300", -1000}, |
| + {"C", "t300", 0}, |
| + {"A", "t300", 0}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_FALSE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { |
| + // Accept: |
| + // A 0*********.. |
| + // B ..1****..... |
| + // C .......2**** |
| + const std::vector<Turn> timing = { |
| + {"A", "t1000", 0}, |
| + {"B", "t500", -800}, |
| + {"C", "t500", 0}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { |
| + // Reject: |
| + // A 0********* |
| + // B ..1****... |
| + // C ....2****. |
| + // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers |
| + // not permitted). |
| + const std::vector<Turn> timing = { |
| + {"A", "t1000", 0}, |
| + {"B", "t500", -800}, |
| + {"C", "t500", -300}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_FALSE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { |
| + // Accept: |
| + // A 0*********.. |
| + // B .2****...... |
| + // C .......3**** |
| + const std::vector<Turn> timing = { |
| + {"A", "t1000", 0}, |
| + {"B", "t500", -900}, |
| + {"C", "t500", 100}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { |
| + // Accept: |
| + // A 0**** |
| + // B 1**** |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t500", -500}, |
| + }; |
| + auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| + |
| + // There is one unique audio track to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { |
| + // Accept: |
| + // A 0****....3****.5**... |
| + // B .....1****...4**..... |
| + // C ......2**.......6**.. |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t500", 0}, |
| + {"C", "t300", -400}, |
| + {"A", "t500", 0}, |
| + {"B", "t300", -100}, |
| + {"A", "t300", -100}, |
| + {"C", "t300", -200}, |
| + }; |
| + auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( |
| + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, |
| + kDefaultMockWavReaderFactoryParamsMap)); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_TRUE(multiend_call.valid()); |
| +} |
| + |
| +TEST_F(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { |
| + // Reject: |
| + // A 0****....3****.6** |
| + // B .....1****...4**.. |
| + // C ......2**.....5**.. |
| + // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ |
| + // speakers not permitted). |
| + const std::vector<Turn> timing = { |
| + {"A", "t500", 0}, |
| + {"B", "t500", 0}, |
| + {"C", "t300", -400}, |
| + {"A", "t500", 0}, |
| + {"B", "t300", -100}, |
| + {"A", "t300", -200}, |
| + {"C", "t300", -200}, |
| + }; |
| + auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( |
| + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, |
| + kDefaultMockWavReaderFactoryParamsMap)); |
| + |
| + // There are two unique audio tracks to read. |
| + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| + |
| + conversational_speech::MultiEndCall multiend_call( |
| + timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| + EXPECT_FALSE(multiend_call.valid()); |
| +} |
| + |
| } // namespace test |
| } // namespace webrtc |