Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(46)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2930853002: Reland of Conversational speech tool, simualtor + unit tests (Closed)
Patch Set: merge Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 22 matching lines...) Expand all
33 // cases in which there are wrong offsets leading to self cross-talk (which is 33 // cases in which there are wrong offsets leading to self cross-talk (which is
34 // rejected). 34 // rejected).
35 35
36 // MSVC++ requires this to be set before any other includes to get M_PI. 36 // MSVC++ requires this to be set before any other includes to get M_PI.
37 #define _USE_MATH_DEFINES 37 #define _USE_MATH_DEFINES
38 38
39 #include <stdio.h> 39 #include <stdio.h>
40 #include <cmath> 40 #include <cmath>
41 #include <map> 41 #include <map>
42 #include <memory> 42 #include <memory>
43 #include <vector>
43 44
44 #include "webrtc/base/logging.h" 45 #include "webrtc/base/logging.h"
46 #include "webrtc/base/optional.h"
45 #include "webrtc/base/pathutils.h" 47 #include "webrtc/base/pathutils.h"
46 #include "webrtc/common_audio/wav_file.h" 48 #include "webrtc/common_audio/wav_file.h"
47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" 49 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" 50 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"
49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 51 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
52 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h "
50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" 53 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"
51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h" 54 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"
52 #include "webrtc/test/gmock.h" 55 #include "webrtc/test/gmock.h"
53 #include "webrtc/test/gtest.h" 56 #include "webrtc/test/gtest.h"
54 #include "webrtc/test/testsupport/fileutils.h" 57 #include "webrtc/test/testsupport/fileutils.h"
55 58
56 namespace webrtc { 59 namespace webrtc {
57 namespace test { 60 namespace test {
58 namespace { 61 namespace {
59 62
(...skipping 16 matching lines...) Expand all
76 {"A", "a3", 0}, 79 {"A", "a3", 0},
77 {"A", "a3", 0}, 80 {"A", "a3", 0},
78 }; 81 };
79 const std::size_t kNumberOfTurns = expected_timing.size(); 82 const std::size_t kNumberOfTurns = expected_timing.size();
80 83
81 // Default arguments for MockWavReaderFactory ctor. 84 // Default arguments for MockWavReaderFactory ctor.
82 // Fake audio track parameters. 85 // Fake audio track parameters.
83 constexpr int kDefaultSampleRate = 48000; 86 constexpr int kDefaultSampleRate = 48000;
84 const std::map<std::string, const MockWavReaderFactory::Params> 87 const std::map<std::string, const MockWavReaderFactory::Params>
85 kDefaultMockWavReaderFactoryParamsMap = { 88 kDefaultMockWavReaderFactoryParamsMap = {
86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds. 89 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.
87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds. 90 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.
88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds. 91 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.
92 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.
93 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.
94 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.
89 }; 95 };
90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = 96 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =
91 kDefaultMockWavReaderFactoryParamsMap.at("t500"); 97 kDefaultMockWavReaderFactoryParamsMap.at("t500");
92 98
93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { 99 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {
94 return std::unique_ptr<MockWavReaderFactory>( 100 return std::unique_ptr<MockWavReaderFactory>(
95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, 101 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
96 kDefaultMockWavReaderFactoryParamsMap)); 102 kDefaultMockWavReaderFactoryParamsMap));
97 } 103 }
98 104
99 void CreateSineWavFile(const std::string& filepath, 105 void CreateSineWavFile(const std::string& filepath,
100 const MockWavReaderFactory::Params& params, 106 const MockWavReaderFactory::Params& params,
101 float frequency = 440.0f) { 107 float frequency = 440.0f) {
102 // Create samples. 108 // Create samples.
103 constexpr double two_pi = 2.0 * M_PI; 109 constexpr double two_pi = 2.0 * M_PI;
104 std::vector<int16_t> samples(params.num_samples); 110 std::vector<int16_t> samples(params.num_samples);
105 for (std::size_t i = 0; i < params.num_samples; ++i) { 111 for (std::size_t i = 0; i < params.num_samples; ++i) {
106 // TODO(alessiob): the produced tone is not pure, improve. 112 // TODO(alessiob): the produced tone is not pure, improve.
107 samples[i] = std::lround(32767.0f * std::sin( 113 samples[i] = std::lround(32767.0f * std::sin(
108 two_pi * i * frequency / params.sample_rate)); 114 two_pi * i * frequency / params.sample_rate));
109 } 115 }
110 116
111 // Write samples. 117 // Write samples.
112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); 118 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);
113 wav_writer.WriteSamples(samples.data(), params.num_samples); 119 wav_writer.WriteSamples(samples.data(), params.num_samples);
114 } 120 }
115 121
122 // Parameters to generate audio tracks with CreateSineWavFile.
123 struct SineAudioTrackParams {
124 MockWavReaderFactory::Params params;
125 float frequency;
126 };
127
128 // Creates a temporary directory in which sine audio tracks are written.
129 std::string CreateTemporarySineAudioTracks(
130 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {
131 // Create temporary directory.
132 rtc::Pathname temp_directory(OutputPath());
133 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks");
134 CreateDir(temp_directory.pathname());
135
136 // Create sine tracks.
137 for (const auto& it : sine_tracks_params) {
138 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first);
139 CreateSineWavFile(
140 temp_filepath.pathname(), it.second.params, it.second.frequency);
141 }
142
143 return temp_directory.pathname();
144 }
145
146 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,
147 const std::string& filepath,
148 const MockWavReaderFactory::Params& expeted_params) {
149 auto wav_reader = wav_reader_factory.Create(filepath);
150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());
151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());
152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());
153 }
154
155 void DeleteFolderAndContents(const std::string& dir) {
156 if (!DirExists(dir)) { return; }
157 rtc::Optional<std::vector<std::string>> dir_content = ReadDirectory(dir);
158 EXPECT_TRUE(dir_content);
159 for (const auto& path : *dir_content) {
160 if (DirExists(path)) {
161 DeleteFolderAndContents(path);
162 } else if (FileExists(path)) {
163 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.
164 RemoveFile(path);
165 } else {
166 FAIL();
167 }
168 }
169 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.
170 RemoveDir(dir);
171 }
172
116 } // namespace 173 } // namespace
117 174
118 using testing::_; 175 using testing::_;
119 176
120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented 177 // TODO(alessiob): Remove fixture once conversational_speech fully implemented
121 // and replace TEST_F with TEST. 178 // and replace TEST_F with TEST.
122 class ConversationalSpeechTest : public testing::Test { 179 class ConversationalSpeechTest : public testing::Test {
123 public: 180 public:
124 ConversationalSpeechTest() { 181 ConversationalSpeechTest() {
125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); 182 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
126 } 183 }
127 }; 184 };
128 185
129 TEST_F(ConversationalSpeechTest, Settings) { 186 TEST_F(ConversationalSpeechTest, Settings) {
130 const conversational_speech::Config config( 187 const conversational_speech::Config config(
131 audiotracks_path, timing_filepath, output_path); 188 audiotracks_path, timing_filepath, output_path);
132 189
133 // Test getters. 190 // Test getters.
134 EXPECT_EQ(audiotracks_path, config.audiotracks_path()); 191 EXPECT_EQ(audiotracks_path, config.audiotracks_path());
135 EXPECT_EQ(timing_filepath, config.timing_filepath()); 192 EXPECT_EQ(timing_filepath, config.timing_filepath());
136 EXPECT_EQ(output_path, config.output_path()); 193 EXPECT_EQ(output_path, config.output_path());
137 } 194 }
138 195
139 TEST_F(ConversationalSpeechTest, TimingSaveLoad) { 196 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {
140 // Save test timing. 197 // Save test timing.
141 const std::string temporary_filepath = webrtc::test::TempFilename( 198 const std::string temporary_filepath = TempFilename(
142 webrtc::test::OutputPath(), "TempTimingTestFile"); 199 OutputPath(), "TempTimingTestFile");
143 SaveTiming(temporary_filepath, expected_timing); 200 SaveTiming(temporary_filepath, expected_timing);
144 201
145 // Create a std::vector<Turn> instance by loading from file. 202 // Create a std::vector<Turn> instance by loading from file.
146 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); 203 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);
147 std::remove(temporary_filepath.c_str()); 204 std::remove(temporary_filepath.c_str());
148 205
149 // Check size. 206 // Check size.
150 EXPECT_EQ(expected_timing.size(), actual_timing.size()); 207 EXPECT_EQ(expected_timing.size(), actual_timing.size());
151 208
152 // Check Turn instances. 209 // Check Turn instances.
(...skipping 13 matching lines...) Expand all
166 conversational_speech::MultiEndCall multiend_call( 223 conversational_speech::MultiEndCall multiend_call(
167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); 224 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
168 EXPECT_TRUE(multiend_call.valid()); 225 EXPECT_TRUE(multiend_call.valid());
169 226
170 // Test. 227 // Test.
171 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 228 EXPECT_EQ(2u, multiend_call.speaker_names().size());
172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); 229 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
173 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); 230 EXPECT_EQ(6u, multiend_call.speaking_turns().size());
174 } 231 }
175 232
233 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
234 const std::vector<Turn> timing = {
235 {"A", "sr8000", 0},
236 {"B", "sr16000", 0},
237 };
238 auto mock_wavreader_factory = CreateMockWavReaderFactory();
239
240 // There are two unique audio tracks to read.
241 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
242
243 MultiEndCall multiend_call(
244 timing, audiotracks_path, std::move(mock_wavreader_factory));
245 EXPECT_FALSE(multiend_call.valid());
246 }
247
248 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
249 const std::vector<Turn> timing = {
250 {"A", "sr16000_stereo", 0},
251 {"B", "sr16000_stereo", 0},
252 };
253 auto mock_wavreader_factory = CreateMockWavReaderFactory();
254
255 // There is one unique audio track to read.
256 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
257
258 MultiEndCall multiend_call(
259 timing, audiotracks_path, std::move(mock_wavreader_factory));
260 EXPECT_FALSE(multiend_call.valid());
261 }
262
263 TEST_F(ConversationalSpeechTest,
264 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
265 const std::vector<Turn> timing = {
266 {"A", "sr8000", 0},
267 {"B", "sr16000_stereo", 0},
268 };
269 auto mock_wavreader_factory = CreateMockWavReaderFactory();
270
271 // There are two unique audio tracks to read.
272 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
273
274 MultiEndCall multiend_call(
275 timing, audiotracks_path, std::move(mock_wavreader_factory));
276 EXPECT_FALSE(multiend_call.valid());
277 }
278
176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { 279 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
177 const std::vector<Turn> timing = { 280 const std::vector<Turn> timing = {
178 {"A", "t500", -100}, 281 {"A", "t500", -100},
179 {"B", "t500", 0}, 282 {"B", "t500", 0},
180 }; 283 };
181 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 284 auto mock_wavreader_factory = CreateMockWavReaderFactory();
182 285
183 // There is one unique audio track to read. 286 // There is one unique audio track to read.
184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 287 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
185 288
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
518 621
519 for (int sample_rate : sample_rates) { 622 for (int sample_rate : sample_rates) {
520 const rtc::Pathname temp_filename( 623 const rtc::Pathname temp_filename(
521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) 624 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)
522 + ".wav"); 625 + ".wav");
523 626
524 // Write wav file. 627 // Write wav file.
525 const std::size_t num_samples = duration_seconds * sample_rate; 628 const std::size_t num_samples = duration_seconds * sample_rate;
526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; 629 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};
527 CreateSineWavFile(temp_filename.pathname(), params); 630 CreateSineWavFile(temp_filename.pathname(), params);
528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created ("
529 << num_samples << " samples)";
530 631
531 // Load wav file and check if params match. 632 // Load wav file and check if params match.
532 WavReaderFactory wav_reader_factory; 633 WavReaderFactory wav_reader_factory;
533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname()); 634 MockWavReaderFactory::Params expeted_params = {
534 EXPECT_EQ(sample_rate, wav_reader->SampleRate()); 635 sample_rate, 1u, num_samples};
535 EXPECT_EQ(1u, wav_reader->NumChannels()); 636 CheckAudioTrackParams(
536 EXPECT_EQ(num_samples, wav_reader->NumSamples()); 637 wav_reader_factory, temp_filename.pathname(), expeted_params);
537 638
538 // Clean up. 639 // Clean up.
539 remove(temp_filename.pathname().c_str()); 640 remove(temp_filename.pathname().c_str());
540 } 641 }
541 } 642 }
542 643
644 TEST_F(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {
645 // Simulated call (one character corresponding to 500 ms):
646 // A 0*********...........2*********.....
647 // B ...........1*********.....3*********
648 const std::vector<Turn> expected_timing = {
649 {"A", "t5000_440.wav", 0},
650 {"B", "t5000_880.wav", 500},
651 {"A", "t5000_440.wav", 0},
652 {"B", "t5000_880.wav", -2500},
653 };
654 const std::size_t expected_duration_seconds = 18;
655
656 // Create temporary audio track files.
657 const int sample_rate = 16000;
658 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {
659 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},
660 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},
661 };
662 const std::string audiotracks_path = CreateTemporarySineAudioTracks(
663 sine_tracks_params);
664
665 // Set up the multi-end call.
666 auto wavreader_factory = std::unique_ptr<WavReaderFactory>(
667 new WavReaderFactory());
668 MultiEndCall multiend_call(
669 expected_timing, audiotracks_path, std::move(wavreader_factory));
670
671 // Simulate the call.
672 rtc::Pathname output_path(audiotracks_path);
673 output_path.AppendFolder("output");
674 CreateDir(output_path.pathname());
675 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname();
676 auto generated_audiotrak_pairs = conversational_speech::Simulate(
677 multiend_call, output_path.pathname());
678 EXPECT_EQ(2u, generated_audiotrak_pairs->size());
679
680 // Check the output.
681 WavReaderFactory wav_reader_factory;
682 const MockWavReaderFactory::Params expeted_params = {
683 sample_rate, 1u, sample_rate * expected_duration_seconds};
684 for (const auto& it : *generated_audiotrak_pairs) {
685 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">";
686 CheckAudioTrackParams(
687 wav_reader_factory, it.second.near_end, expeted_params);
688 CheckAudioTrackParams(
689 wav_reader_factory, it.second.far_end, expeted_params);
690 }
691
692 // Clean.
693 EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path));
694 }
695
543 } // namespace test 696 } // namespace test
544 } // namespace webrtc 697 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698