OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 24 matching lines...) Expand all Loading... |
35 | 35 |
36 // MSVC++ requires this to be set before any other includes to get M_PI. | 36 // MSVC++ requires this to be set before any other includes to get M_PI. |
37 #define _USE_MATH_DEFINES | 37 #define _USE_MATH_DEFINES |
38 | 38 |
39 #include <stdio.h> | 39 #include <stdio.h> |
40 #include <cmath> | 40 #include <cmath> |
41 #include <map> | 41 #include <map> |
42 #include <memory> | 42 #include <memory> |
43 | 43 |
44 #include "webrtc/base/logging.h" | 44 #include "webrtc/base/logging.h" |
| 45 #include "webrtc/base/fileutils.h" |
45 #include "webrtc/base/pathutils.h" | 46 #include "webrtc/base/pathutils.h" |
46 #include "webrtc/common_audio/wav_file.h" | 47 #include "webrtc/common_audio/wav_file.h" |
47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" | 48 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" |
48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea
der_factory.h" | 49 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea
der_factory.h" |
49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca
ll.h" | 50 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca
ll.h" |
| 51 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h
" |
50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" | 52 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" |
51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f
actory.h" | 53 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f
actory.h" |
52 #include "webrtc/test/gmock.h" | 54 #include "webrtc/test/gmock.h" |
53 #include "webrtc/test/gtest.h" | 55 #include "webrtc/test/gtest.h" |
54 #include "webrtc/test/testsupport/fileutils.h" | 56 #include "webrtc/test/testsupport/fileutils.h" |
55 | 57 |
56 namespace webrtc { | 58 namespace webrtc { |
57 namespace test { | 59 namespace test { |
58 namespace { | 60 namespace { |
59 | 61 |
(...skipping 16 matching lines...) Expand all Loading... |
76 {"A", "a3", 0}, | 78 {"A", "a3", 0}, |
77 {"A", "a3", 0}, | 79 {"A", "a3", 0}, |
78 }; | 80 }; |
79 const std::size_t kNumberOfTurns = expected_timing.size(); | 81 const std::size_t kNumberOfTurns = expected_timing.size(); |
80 | 82 |
81 // Default arguments for MockWavReaderFactory ctor. | 83 // Default arguments for MockWavReaderFactory ctor. |
82 // Fake audio track parameters. | 84 // Fake audio track parameters. |
83 constexpr int kDefaultSampleRate = 48000; | 85 constexpr int kDefaultSampleRate = 48000; |
84 const std::map<std::string, const MockWavReaderFactory::Params> | 86 const std::map<std::string, const MockWavReaderFactory::Params> |
85 kDefaultMockWavReaderFactoryParamsMap = { | 87 kDefaultMockWavReaderFactoryParamsMap = { |
86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds. | 88 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds. |
87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds. | 89 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds. |
88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds. | 90 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds. |
| 91 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second. |
| 92 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second. |
| 93 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo. |
89 }; | 94 }; |
90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = | 95 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = |
91 kDefaultMockWavReaderFactoryParamsMap.at("t500"); | 96 kDefaultMockWavReaderFactoryParamsMap.at("t500"); |
92 | 97 |
93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { | 98 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { |
94 return std::unique_ptr<MockWavReaderFactory>( | 99 return std::unique_ptr<MockWavReaderFactory>( |
95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | 100 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, |
96 kDefaultMockWavReaderFactoryParamsMap)); | 101 kDefaultMockWavReaderFactoryParamsMap)); |
97 } | 102 } |
98 | 103 |
99 void CreateSineWavFile(const std::string& filepath, | 104 void CreateSineWavFile(const std::string& filepath, |
100 const MockWavReaderFactory::Params& params, | 105 const MockWavReaderFactory::Params& params, |
101 float frequency = 440.0f) { | 106 float frequency = 440.0f) { |
102 // Create samples. | 107 // Create samples. |
103 constexpr double two_pi = 2.0 * M_PI; | 108 constexpr double two_pi = 2.0 * M_PI; |
104 std::vector<int16_t> samples(params.num_samples); | 109 std::vector<int16_t> samples(params.num_samples); |
105 for (std::size_t i = 0; i < params.num_samples; ++i) { | 110 for (std::size_t i = 0; i < params.num_samples; ++i) { |
106 // TODO(alessiob): the produced tone is not pure, improve. | 111 // TODO(alessiob): the produced tone is not pure, improve. |
107 samples[i] = std::lround(32767.0f * std::sin( | 112 samples[i] = std::lround(32767.0f * std::sin( |
108 two_pi * i * frequency / params.sample_rate)); | 113 two_pi * i * frequency / params.sample_rate)); |
109 } | 114 } |
110 | 115 |
111 // Write samples. | 116 // Write samples. |
112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); | 117 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); |
113 wav_writer.WriteSamples(samples.data(), params.num_samples); | 118 wav_writer.WriteSamples(samples.data(), params.num_samples); |
114 } | 119 } |
115 | 120 |
| 121 // Parameters to generate audio tracks with CreateSineWavFile. |
| 122 struct SineAudioTrackParams { |
| 123 MockWavReaderFactory::Params params; |
| 124 float frequency; |
| 125 }; |
| 126 |
| 127 // Creates a temporary directory in which sine audio tracks are written. |
| 128 std::string CreateTemporarySineAudioTracks( |
| 129 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) { |
| 130 // Create temporary directory. |
| 131 rtc::Pathname temp_directory(OutputPath()); |
| 132 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks"); |
| 133 webrtc::test::CreateDir(temp_directory.pathname()); |
| 134 |
| 135 // Create sine tracks. |
| 136 for (const auto& it : sine_tracks_params) { |
| 137 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first); |
| 138 CreateSineWavFile( |
| 139 temp_filepath.pathname(), it.second.params, it.second.frequency); |
| 140 } |
| 141 |
| 142 return temp_directory.pathname(); |
| 143 } |
| 144 |
| 145 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory, |
| 146 const std::string& filepath, |
| 147 const MockWavReaderFactory::Params& expeted_params) { |
| 148 auto wav_reader = wav_reader_factory.Create(filepath); |
| 149 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); |
| 150 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); |
| 151 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); |
| 152 } |
| 153 |
116 } // namespace | 154 } // namespace |
117 | 155 |
118 using testing::_; | 156 using testing::_; |
119 | 157 |
120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented | 158 // TODO(alessiob): Remove fixture once conversational_speech fully implemented |
121 // and replace TEST_F with TEST. | 159 // and replace TEST_F with TEST. |
122 class ConversationalSpeechTest : public testing::Test { | 160 class ConversationalSpeechTest : public testing::Test { |
123 public: | 161 public: |
124 ConversationalSpeechTest() { | 162 ConversationalSpeechTest() { |
125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); | 163 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 conversational_speech::MultiEndCall multiend_call( | 204 conversational_speech::MultiEndCall multiend_call( |
167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); | 205 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); |
168 EXPECT_TRUE(multiend_call.valid()); | 206 EXPECT_TRUE(multiend_call.valid()); |
169 | 207 |
170 // Test. | 208 // Test. |
171 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 209 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); | 210 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); |
173 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); | 211 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); |
174 } | 212 } |
175 | 213 |
| 214 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { |
| 215 const std::vector<Turn> timing = { |
| 216 {"A", "sr8000", 0}, |
| 217 {"B", "sr16000", 0}, |
| 218 }; |
| 219 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| 220 |
| 221 // There are two unique audio tracks to read. |
| 222 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| 223 |
| 224 MultiEndCall multiend_call( |
| 225 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| 226 EXPECT_FALSE(multiend_call.valid()); |
| 227 } |
| 228 |
| 229 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { |
| 230 const std::vector<Turn> timing = { |
| 231 {"A", "sr16000_stereo", 0}, |
| 232 {"B", "sr16000_stereo", 0}, |
| 233 }; |
| 234 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| 235 |
| 236 // There is one unique audio track to read. |
| 237 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
| 238 |
| 239 MultiEndCall multiend_call( |
| 240 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| 241 EXPECT_FALSE(multiend_call.valid()); |
| 242 } |
| 243 |
| 244 TEST_F(ConversationalSpeechTest, |
| 245 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { |
| 246 const std::vector<Turn> timing = { |
| 247 {"A", "sr8000", 0}, |
| 248 {"B", "sr16000_stereo", 0}, |
| 249 }; |
| 250 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
| 251 |
| 252 // There are two unique audio tracks to read. |
| 253 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
| 254 |
| 255 MultiEndCall multiend_call( |
| 256 timing, audiotracks_path, std::move(mock_wavreader_factory)); |
| 257 EXPECT_FALSE(multiend_call.valid()); |
| 258 } |
| 259 |
176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { | 260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { |
177 const std::vector<Turn> timing = { | 261 const std::vector<Turn> timing = { |
178 {"A", "t500", -100}, | 262 {"A", "t500", -100}, |
179 {"B", "t500", 0}, | 263 {"B", "t500", 0}, |
180 }; | 264 }; |
181 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 265 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
182 | 266 |
183 // There is one unique audio track to read. | 267 // There is one unique audio track to read. |
184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 268 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
185 | 269 |
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
518 | 602 |
519 for (int sample_rate : sample_rates) { | 603 for (int sample_rate : sample_rates) { |
520 const rtc::Pathname temp_filename( | 604 const rtc::Pathname temp_filename( |
521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) | 605 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) |
522 + ".wav"); | 606 + ".wav"); |
523 | 607 |
524 // Write wav file. | 608 // Write wav file. |
525 const std::size_t num_samples = duration_seconds * sample_rate; | 609 const std::size_t num_samples = duration_seconds * sample_rate; |
526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; | 610 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; |
527 CreateSineWavFile(temp_filename.pathname(), params); | 611 CreateSineWavFile(temp_filename.pathname(), params); |
528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created (" | |
529 << num_samples << " samples)"; | |
530 | 612 |
531 // Load wav file and check if params match. | 613 // Load wav file and check if params match. |
532 WavReaderFactory wav_reader_factory; | 614 WavReaderFactory wav_reader_factory; |
533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname()); | 615 MockWavReaderFactory::Params expeted_params = { |
534 EXPECT_EQ(sample_rate, wav_reader->SampleRate()); | 616 sample_rate, 1u, num_samples}; |
535 EXPECT_EQ(1u, wav_reader->NumChannels()); | 617 CheckAudioTrackParams( |
536 EXPECT_EQ(num_samples, wav_reader->NumSamples()); | 618 wav_reader_factory, temp_filename.pathname(), expeted_params); |
537 | 619 |
538 // Clean up. | 620 // Clean up. |
539 remove(temp_filename.pathname().c_str()); | 621 remove(temp_filename.pathname().c_str()); |
540 } | 622 } |
541 } | 623 } |
542 | 624 |
| 625 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) { |
| 626 // Simulated call (one character corresponding to 500 ms): |
| 627 // A 0*********...........2*********..... |
| 628 // B ...........1*********.....3********* |
| 629 const std::vector<Turn> expected_timing = { |
| 630 {"A", "t5000_440.wav", 0}, |
| 631 {"B", "t5000_880.wav", 500}, |
| 632 {"A", "t5000_440.wav", 0}, |
| 633 {"B", "t5000_880.wav", -2500}, |
| 634 }; |
| 635 const std::size_t expected_duration_seconds = 18; |
| 636 |
| 637 // Create temporary audio track files. |
| 638 const int sample_rate = 16000; |
| 639 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = { |
| 640 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}}, |
| 641 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}}, |
| 642 }; |
| 643 const std::string audiotracks_path = CreateTemporarySineAudioTracks( |
| 644 sine_tracks_params); |
| 645 |
| 646 // Set up the multi-end call. |
| 647 auto wavreader_factory = std::unique_ptr<WavReaderFactory>( |
| 648 new WavReaderFactory()); |
| 649 MultiEndCall multiend_call( |
| 650 expected_timing, audiotracks_path, std::move(wavreader_factory)); |
| 651 |
| 652 // Simulate the call. |
| 653 rtc::Pathname output_path(audiotracks_path); |
| 654 output_path.AppendFolder("output"); |
| 655 webrtc::test::CreateDir(output_path.pathname()); |
| 656 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname(); |
| 657 auto generated_audiotrak_pairs = conversational_speech::Simulate( |
| 658 multiend_call, output_path.pathname()); |
| 659 EXPECT_EQ(2u, generated_audiotrak_pairs->size()); |
| 660 |
| 661 // Check the output. |
| 662 WavReaderFactory wav_reader_factory; |
| 663 const MockWavReaderFactory::Params expeted_params = { |
| 664 sample_rate, 1u, sample_rate * expected_duration_seconds}; |
| 665 for (const auto& it : *generated_audiotrak_pairs) { |
| 666 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">"; |
| 667 CheckAudioTrackParams( |
| 668 wav_reader_factory, it.second.near_end, expeted_params); |
| 669 CheckAudioTrackParams( |
| 670 wav_reader_factory, it.second.far_end, expeted_params); |
| 671 } |
| 672 |
| 673 // Clean. |
| 674 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents( |
| 675 rtc::Pathname(audiotracks_path))) |
| 676 << "Cannot delete temporary data directory " << audiotracks_path; |
| 677 } |
| 678 |
543 } // namespace test | 679 } // namespace test |
544 } // namespace webrtc | 680 } // namespace webrtc |
OLD | NEW |