OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... |
33 // cases in which there are wrong offsets leading to self cross-talk (which is | 33 // cases in which there are wrong offsets leading to self cross-talk (which is |
34 // rejected). | 34 // rejected). |
35 | 35 |
36 // MSVC++ requires this to be set before any other includes to get M_PI. | 36 // MSVC++ requires this to be set before any other includes to get M_PI. |
37 #define _USE_MATH_DEFINES | 37 #define _USE_MATH_DEFINES |
38 | 38 |
39 #include <stdio.h> | 39 #include <stdio.h> |
40 #include <cmath> | 40 #include <cmath> |
41 #include <map> | 41 #include <map> |
42 #include <memory> | 42 #include <memory> |
43 #include <vector> | |
44 | 43 |
45 #include "webrtc/base/logging.h" | 44 #include "webrtc/base/logging.h" |
46 #include "webrtc/base/optional.h" | |
47 #include "webrtc/base/pathutils.h" | 45 #include "webrtc/base/pathutils.h" |
48 #include "webrtc/common_audio/wav_file.h" | 46 #include "webrtc/common_audio/wav_file.h" |
49 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" | 47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" |
50 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea
der_factory.h" | 48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea
der_factory.h" |
51 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca
ll.h" | 49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca
ll.h" |
52 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h
" | |
53 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" | 50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" |
54 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f
actory.h" | 51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f
actory.h" |
55 #include "webrtc/test/gmock.h" | 52 #include "webrtc/test/gmock.h" |
56 #include "webrtc/test/gtest.h" | 53 #include "webrtc/test/gtest.h" |
57 #include "webrtc/test/testsupport/fileutils.h" | 54 #include "webrtc/test/testsupport/fileutils.h" |
58 | 55 |
59 namespace webrtc { | 56 namespace webrtc { |
60 namespace test { | 57 namespace test { |
61 namespace { | 58 namespace { |
62 | 59 |
(...skipping 16 matching lines...) Expand all Loading... |
79 {"A", "a3", 0}, | 76 {"A", "a3", 0}, |
80 {"A", "a3", 0}, | 77 {"A", "a3", 0}, |
81 }; | 78 }; |
82 const std::size_t kNumberOfTurns = expected_timing.size(); | 79 const std::size_t kNumberOfTurns = expected_timing.size(); |
83 | 80 |
84 // Default arguments for MockWavReaderFactory ctor. | 81 // Default arguments for MockWavReaderFactory ctor. |
85 // Fake audio track parameters. | 82 // Fake audio track parameters. |
86 constexpr int kDefaultSampleRate = 48000; | 83 constexpr int kDefaultSampleRate = 48000; |
87 const std::map<std::string, const MockWavReaderFactory::Params> | 84 const std::map<std::string, const MockWavReaderFactory::Params> |
88 kDefaultMockWavReaderFactoryParamsMap = { | 85 kDefaultMockWavReaderFactoryParamsMap = { |
89 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds. | 86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds. |
90 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds. | 87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds. |
91 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds. | 88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds. |
92 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second. | |
93 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second. | |
94 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo. | |
95 }; | 89 }; |
96 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = | 90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = |
97 kDefaultMockWavReaderFactoryParamsMap.at("t500"); | 91 kDefaultMockWavReaderFactoryParamsMap.at("t500"); |
98 | 92 |
99 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { | 93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { |
100 return std::unique_ptr<MockWavReaderFactory>( | 94 return std::unique_ptr<MockWavReaderFactory>( |
101 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, | 95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, |
102 kDefaultMockWavReaderFactoryParamsMap)); | 96 kDefaultMockWavReaderFactoryParamsMap)); |
103 } | 97 } |
104 | 98 |
105 void CreateSineWavFile(const std::string& filepath, | 99 void CreateSineWavFile(const std::string& filepath, |
106 const MockWavReaderFactory::Params& params, | 100 const MockWavReaderFactory::Params& params, |
107 float frequency = 440.0f) { | 101 float frequency = 440.0f) { |
108 // Create samples. | 102 // Create samples. |
109 constexpr double two_pi = 2.0 * M_PI; | 103 constexpr double two_pi = 2.0 * M_PI; |
110 std::vector<int16_t> samples(params.num_samples); | 104 std::vector<int16_t> samples(params.num_samples); |
111 for (std::size_t i = 0; i < params.num_samples; ++i) { | 105 for (std::size_t i = 0; i < params.num_samples; ++i) { |
112 // TODO(alessiob): the produced tone is not pure, improve. | 106 // TODO(alessiob): the produced tone is not pure, improve. |
113 samples[i] = std::lround(32767.0f * std::sin( | 107 samples[i] = std::lround(32767.0f * std::sin( |
114 two_pi * i * frequency / params.sample_rate)); | 108 two_pi * i * frequency / params.sample_rate)); |
115 } | 109 } |
116 | 110 |
117 // Write samples. | 111 // Write samples. |
118 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); | 112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); |
119 wav_writer.WriteSamples(samples.data(), params.num_samples); | 113 wav_writer.WriteSamples(samples.data(), params.num_samples); |
120 } | 114 } |
121 | 115 |
122 // Parameters to generate audio tracks with CreateSineWavFile. | |
123 struct SineAudioTrackParams { | |
124 MockWavReaderFactory::Params params; | |
125 float frequency; | |
126 }; | |
127 | |
128 // Creates a temporary directory in which sine audio tracks are written. | |
129 std::string CreateTemporarySineAudioTracks( | |
130 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) { | |
131 // Create temporary directory. | |
132 rtc::Pathname temp_directory(OutputPath()); | |
133 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks"); | |
134 CreateDir(temp_directory.pathname()); | |
135 | |
136 // Create sine tracks. | |
137 for (const auto& it : sine_tracks_params) { | |
138 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first); | |
139 CreateSineWavFile( | |
140 temp_filepath.pathname(), it.second.params, it.second.frequency); | |
141 } | |
142 | |
143 return temp_directory.pathname(); | |
144 } | |
145 | |
146 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory, | |
147 const std::string& filepath, | |
148 const MockWavReaderFactory::Params& expeted_params) { | |
149 auto wav_reader = wav_reader_factory.Create(filepath); | |
150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); | |
151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); | |
152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); | |
153 } | |
154 | |
155 void DeleteFolderAndContents(const std::string& dir) { | |
156 if (!DirExists(dir)) { return; } | |
157 rtc::Optional<std::vector<std::string>> dir_content = ReadDirectory(dir); | |
158 EXPECT_TRUE(dir_content); | |
159 for (const auto& path : *dir_content) { | |
160 if (DirExists(path)) { | |
161 DeleteFolderAndContents(path); | |
162 } else if (FileExists(path)) { | |
163 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. | |
164 RemoveFile(path); | |
165 } else { | |
166 FAIL(); | |
167 } | |
168 } | |
169 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. | |
170 RemoveDir(dir); | |
171 } | |
172 | |
173 } // namespace | 116 } // namespace |
174 | 117 |
175 using testing::_; | 118 using testing::_; |
176 | 119 |
177 // TODO(alessiob): Remove fixture once conversational_speech fully implemented | 120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented |
178 // and replace TEST_F with TEST. | 121 // and replace TEST_F with TEST. |
179 class ConversationalSpeechTest : public testing::Test { | 122 class ConversationalSpeechTest : public testing::Test { |
180 public: | 123 public: |
181 ConversationalSpeechTest() { | 124 ConversationalSpeechTest() { |
182 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); | 125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); |
183 } | 126 } |
184 }; | 127 }; |
185 | 128 |
186 TEST_F(ConversationalSpeechTest, Settings) { | 129 TEST_F(ConversationalSpeechTest, Settings) { |
187 const conversational_speech::Config config( | 130 const conversational_speech::Config config( |
188 audiotracks_path, timing_filepath, output_path); | 131 audiotracks_path, timing_filepath, output_path); |
189 | 132 |
190 // Test getters. | 133 // Test getters. |
191 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); | 134 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); |
192 EXPECT_EQ(timing_filepath, config.timing_filepath()); | 135 EXPECT_EQ(timing_filepath, config.timing_filepath()); |
193 EXPECT_EQ(output_path, config.output_path()); | 136 EXPECT_EQ(output_path, config.output_path()); |
194 } | 137 } |
195 | 138 |
196 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { | 139 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { |
197 // Save test timing. | 140 // Save test timing. |
198 const std::string temporary_filepath = TempFilename( | 141 const std::string temporary_filepath = webrtc::test::TempFilename( |
199 OutputPath(), "TempTimingTestFile"); | 142 webrtc::test::OutputPath(), "TempTimingTestFile"); |
200 SaveTiming(temporary_filepath, expected_timing); | 143 SaveTiming(temporary_filepath, expected_timing); |
201 | 144 |
202 // Create a std::vector<Turn> instance by loading from file. | 145 // Create a std::vector<Turn> instance by loading from file. |
203 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); | 146 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); |
204 std::remove(temporary_filepath.c_str()); | 147 std::remove(temporary_filepath.c_str()); |
205 | 148 |
206 // Check size. | 149 // Check size. |
207 EXPECT_EQ(expected_timing.size(), actual_timing.size()); | 150 EXPECT_EQ(expected_timing.size(), actual_timing.size()); |
208 | 151 |
209 // Check Turn instances. | 152 // Check Turn instances. |
(...skipping 13 matching lines...) Expand all Loading... |
223 conversational_speech::MultiEndCall multiend_call( | 166 conversational_speech::MultiEndCall multiend_call( |
224 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); | 167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); |
225 EXPECT_TRUE(multiend_call.valid()); | 168 EXPECT_TRUE(multiend_call.valid()); |
226 | 169 |
227 // Test. | 170 // Test. |
228 EXPECT_EQ(2u, multiend_call.speaker_names().size()); | 171 EXPECT_EQ(2u, multiend_call.speaker_names().size()); |
229 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); | 172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); |
230 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); | 173 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); |
231 } | 174 } |
232 | 175 |
233 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { | |
234 const std::vector<Turn> timing = { | |
235 {"A", "sr8000", 0}, | |
236 {"B", "sr16000", 0}, | |
237 }; | |
238 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
239 | |
240 // There are two unique audio tracks to read. | |
241 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
242 | |
243 MultiEndCall multiend_call( | |
244 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
245 EXPECT_FALSE(multiend_call.valid()); | |
246 } | |
247 | |
248 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { | |
249 const std::vector<Turn> timing = { | |
250 {"A", "sr16000_stereo", 0}, | |
251 {"B", "sr16000_stereo", 0}, | |
252 }; | |
253 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
254 | |
255 // There is one unique audio track to read. | |
256 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); | |
257 | |
258 MultiEndCall multiend_call( | |
259 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
260 EXPECT_FALSE(multiend_call.valid()); | |
261 } | |
262 | |
263 TEST_F(ConversationalSpeechTest, | |
264 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { | |
265 const std::vector<Turn> timing = { | |
266 {"A", "sr8000", 0}, | |
267 {"B", "sr16000_stereo", 0}, | |
268 }; | |
269 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | |
270 | |
271 // There are two unique audio tracks to read. | |
272 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); | |
273 | |
274 MultiEndCall multiend_call( | |
275 timing, audiotracks_path, std::move(mock_wavreader_factory)); | |
276 EXPECT_FALSE(multiend_call.valid()); | |
277 } | |
278 | |
279 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { | 176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { |
280 const std::vector<Turn> timing = { | 177 const std::vector<Turn> timing = { |
281 {"A", "t500", -100}, | 178 {"A", "t500", -100}, |
282 {"B", "t500", 0}, | 179 {"B", "t500", 0}, |
283 }; | 180 }; |
284 auto mock_wavreader_factory = CreateMockWavReaderFactory(); | 181 auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
285 | 182 |
286 // There is one unique audio track to read. | 183 // There is one unique audio track to read. |
287 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); | 184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); |
288 | 185 |
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
621 | 518 |
622 for (int sample_rate : sample_rates) { | 519 for (int sample_rate : sample_rates) { |
623 const rtc::Pathname temp_filename( | 520 const rtc::Pathname temp_filename( |
624 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) | 521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) |
625 + ".wav"); | 522 + ".wav"); |
626 | 523 |
627 // Write wav file. | 524 // Write wav file. |
628 const std::size_t num_samples = duration_seconds * sample_rate; | 525 const std::size_t num_samples = duration_seconds * sample_rate; |
629 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; | 526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; |
630 CreateSineWavFile(temp_filename.pathname(), params); | 527 CreateSineWavFile(temp_filename.pathname(), params); |
| 528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created (" |
| 529 << num_samples << " samples)"; |
631 | 530 |
632 // Load wav file and check if params match. | 531 // Load wav file and check if params match. |
633 WavReaderFactory wav_reader_factory; | 532 WavReaderFactory wav_reader_factory; |
634 MockWavReaderFactory::Params expeted_params = { | 533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname()); |
635 sample_rate, 1u, num_samples}; | 534 EXPECT_EQ(sample_rate, wav_reader->SampleRate()); |
636 CheckAudioTrackParams( | 535 EXPECT_EQ(1u, wav_reader->NumChannels()); |
637 wav_reader_factory, temp_filename.pathname(), expeted_params); | 536 EXPECT_EQ(num_samples, wav_reader->NumSamples()); |
638 | 537 |
639 // Clean up. | 538 // Clean up. |
640 remove(temp_filename.pathname().c_str()); | 539 remove(temp_filename.pathname().c_str()); |
641 } | 540 } |
642 } | 541 } |
643 | 542 |
644 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) { | |
645 // Simulated call (one character corresponding to 500 ms): | |
646 // A 0*********...........2*********..... | |
647 // B ...........1*********.....3********* | |
648 const std::vector<Turn> expected_timing = { | |
649 {"A", "t5000_440.wav", 0}, | |
650 {"B", "t5000_880.wav", 500}, | |
651 {"A", "t5000_440.wav", 0}, | |
652 {"B", "t5000_880.wav", -2500}, | |
653 }; | |
654 const std::size_t expected_duration_seconds = 18; | |
655 | |
656 // Create temporary audio track files. | |
657 const int sample_rate = 16000; | |
658 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = { | |
659 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}}, | |
660 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}}, | |
661 }; | |
662 const std::string audiotracks_path = CreateTemporarySineAudioTracks( | |
663 sine_tracks_params); | |
664 | |
665 // Set up the multi-end call. | |
666 auto wavreader_factory = std::unique_ptr<WavReaderFactory>( | |
667 new WavReaderFactory()); | |
668 MultiEndCall multiend_call( | |
669 expected_timing, audiotracks_path, std::move(wavreader_factory)); | |
670 | |
671 // Simulate the call. | |
672 rtc::Pathname output_path(audiotracks_path); | |
673 output_path.AppendFolder("output"); | |
674 CreateDir(output_path.pathname()); | |
675 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname(); | |
676 auto generated_audiotrak_pairs = conversational_speech::Simulate( | |
677 multiend_call, output_path.pathname()); | |
678 EXPECT_EQ(2u, generated_audiotrak_pairs->size()); | |
679 | |
680 // Check the output. | |
681 WavReaderFactory wav_reader_factory; | |
682 const MockWavReaderFactory::Params expeted_params = { | |
683 sample_rate, 1u, sample_rate * expected_duration_seconds}; | |
684 for (const auto& it : *generated_audiotrak_pairs) { | |
685 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">"; | |
686 CheckAudioTrackParams( | |
687 wav_reader_factory, it.second.near_end, expeted_params); | |
688 CheckAudioTrackParams( | |
689 wav_reader_factory, it.second.far_end, expeted_params); | |
690 } | |
691 | |
692 // Clean. | |
693 EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path)); | |
694 } | |
695 | |
696 } // namespace test | 543 } // namespace test |
697 } // namespace webrtc | 544 } // namespace webrtc |
OLD | NEW |