Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2790933002: Conversational speech tool, simualtor + unit tests (Closed)
Patch Set: map iterators simplified Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 24 matching lines...) Expand all
35 35
36 // MSVC++ requires this to be set before any other includes to get M_PI. 36 // MSVC++ requires this to be set before any other includes to get M_PI.
37 #define _USE_MATH_DEFINES 37 #define _USE_MATH_DEFINES
38 38
39 #include <stdio.h> 39 #include <stdio.h>
40 #include <cmath> 40 #include <cmath>
41 #include <map> 41 #include <map>
42 #include <memory> 42 #include <memory>
43 43
44 #include "webrtc/base/logging.h" 44 #include "webrtc/base/logging.h"
45 #include "webrtc/base/fileutils.h"
45 #include "webrtc/base/pathutils.h" 46 #include "webrtc/base/pathutils.h"
46 #include "webrtc/common_audio/wav_file.h" 47 #include "webrtc/common_audio/wav_file.h"
47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h" 48 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"
48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h" 49 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"
49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 50 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
51 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h "
50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" 52 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"
51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h" 53 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"
52 #include "webrtc/test/gmock.h" 54 #include "webrtc/test/gmock.h"
53 #include "webrtc/test/gtest.h" 55 #include "webrtc/test/gtest.h"
54 #include "webrtc/test/testsupport/fileutils.h" 56 #include "webrtc/test/testsupport/fileutils.h"
55 57
56 namespace webrtc { 58 namespace webrtc {
57 namespace test { 59 namespace test {
58 namespace { 60 namespace {
59 61
(...skipping 16 matching lines...) Expand all
76 {"A", "a3", 0}, 78 {"A", "a3", 0},
77 {"A", "a3", 0}, 79 {"A", "a3", 0},
78 }; 80 };
79 const std::size_t kNumberOfTurns = expected_timing.size(); 81 const std::size_t kNumberOfTurns = expected_timing.size();
80 82
81 // Default arguments for MockWavReaderFactory ctor. 83 // Default arguments for MockWavReaderFactory ctor.
82 // Fake audio track parameters. 84 // Fake audio track parameters.
83 constexpr int kDefaultSampleRate = 48000; 85 constexpr int kDefaultSampleRate = 48000;
84 const std::map<std::string, const MockWavReaderFactory::Params> 86 const std::map<std::string, const MockWavReaderFactory::Params>
85 kDefaultMockWavReaderFactoryParamsMap = { 87 kDefaultMockWavReaderFactoryParamsMap = {
86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds. 88 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.
87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds. 89 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.
88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds. 90 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.
91 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.
92 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.
93 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.
89 }; 94 };
90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = 95 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =
91 kDefaultMockWavReaderFactoryParamsMap.at("t500"); 96 kDefaultMockWavReaderFactoryParamsMap.at("t500");
92 97
93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { 98 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {
94 return std::unique_ptr<MockWavReaderFactory>( 99 return std::unique_ptr<MockWavReaderFactory>(
95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, 100 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,
96 kDefaultMockWavReaderFactoryParamsMap)); 101 kDefaultMockWavReaderFactoryParamsMap));
97 } 102 }
98 103
99 void CreateSineWavFile(const std::string& filepath, 104 void CreateSineWavFile(const std::string& filepath,
100 const MockWavReaderFactory::Params& params, 105 const MockWavReaderFactory::Params& params,
101 float frequency = 440.0f) { 106 float frequency = 440.0f) {
102 // Create samples. 107 // Create samples.
103 constexpr double two_pi = 2.0 * M_PI; 108 constexpr double two_pi = 2.0 * M_PI;
104 std::vector<int16_t> samples(params.num_samples); 109 std::vector<int16_t> samples(params.num_samples);
105 for (std::size_t i = 0; i < params.num_samples; ++i) { 110 for (std::size_t i = 0; i < params.num_samples; ++i) {
106 // TODO(alessiob): the produced tone is not pure, improve. 111 // TODO(alessiob): the produced tone is not pure, improve.
107 samples[i] = std::lround(32767.0f * std::sin( 112 samples[i] = std::lround(32767.0f * std::sin(
108 two_pi * i * frequency / params.sample_rate)); 113 two_pi * i * frequency / params.sample_rate));
109 } 114 }
110 115
111 // Write samples. 116 // Write samples.
112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); 117 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);
113 wav_writer.WriteSamples(samples.data(), params.num_samples); 118 wav_writer.WriteSamples(samples.data(), params.num_samples);
114 } 119 }
115 120
121 // Parameters to generate audio tracks with CreateSineWavFile.
122 struct SineAudioTrackParams {
123 MockWavReaderFactory::Params params;
124 float frequency;
125 };
126
127 // Creates a temporary directory in which sine audio tracks are written.
128 std::string CreateTemporarySineAudioTracks(
129 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {
130 // Create temporary directory.
131 rtc::Pathname temp_directory(OutputPath());
132 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks");
133 webrtc::test::CreateDir(temp_directory.pathname());
134
135 // Create sine tracks.
136 for (const auto& it : sine_tracks_params) {
137 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first);
138 CreateSineWavFile(
139 temp_filepath.pathname(), it.second.params, it.second.frequency);
140 }
141
142 return temp_directory.pathname();
143 }
144
145 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,
146 const std::string& filepath,
147 const MockWavReaderFactory::Params& expeted_params) {
148 auto wav_reader = wav_reader_factory.Create(filepath);
149 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());
150 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());
151 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());
152 }
153
116 } // namespace 154 } // namespace
117 155
118 using testing::_; 156 using testing::_;
119 157
120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented 158 // TODO(alessiob): Remove fixture once conversational_speech fully implemented
121 // and replace TEST_F with TEST. 159 // and replace TEST_F with TEST.
122 class ConversationalSpeechTest : public testing::Test { 160 class ConversationalSpeechTest : public testing::Test {
123 public: 161 public:
124 ConversationalSpeechTest() { 162 ConversationalSpeechTest() {
125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); 163 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 conversational_speech::MultiEndCall multiend_call( 204 conversational_speech::MultiEndCall multiend_call(
167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); 205 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));
168 EXPECT_TRUE(multiend_call.valid()); 206 EXPECT_TRUE(multiend_call.valid());
169 207
170 // Test. 208 // Test.
171 EXPECT_EQ(2u, multiend_call.speaker_names().size()); 209 EXPECT_EQ(2u, multiend_call.speaker_names().size());
172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); 210 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());
173 EXPECT_EQ(6u, multiend_call.speaking_turns().size()); 211 EXPECT_EQ(6u, multiend_call.speaking_turns().size());
174 } 212 }
175 213
214 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {
215 const std::vector<Turn> timing = {
216 {"A", "sr8000", 0},
217 {"B", "sr16000", 0},
218 };
219 auto mock_wavreader_factory = CreateMockWavReaderFactory();
220
221 // There are two unique audio tracks to read.
222 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
223
224 MultiEndCall multiend_call(
225 timing, audiotracks_path, std::move(mock_wavreader_factory));
226 EXPECT_FALSE(multiend_call.valid());
227 }
228
229 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {
230 const std::vector<Turn> timing = {
231 {"A", "sr16000_stereo", 0},
232 {"B", "sr16000_stereo", 0},
233 };
234 auto mock_wavreader_factory = CreateMockWavReaderFactory();
235
236 // There is one unique audio track to read.
237 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);
238
239 MultiEndCall multiend_call(
240 timing, audiotracks_path, std::move(mock_wavreader_factory));
241 EXPECT_FALSE(multiend_call.valid());
242 }
243
244 TEST_F(ConversationalSpeechTest,
245 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {
246 const std::vector<Turn> timing = {
247 {"A", "sr8000", 0},
248 {"B", "sr16000_stereo", 0},
249 };
250 auto mock_wavreader_factory = CreateMockWavReaderFactory();
251
252 // There are two unique audio tracks to read.
253 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);
254
255 MultiEndCall multiend_call(
256 timing, audiotracks_path, std::move(mock_wavreader_factory));
257 EXPECT_FALSE(multiend_call.valid());
258 }
259
176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { 260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {
177 const std::vector<Turn> timing = { 261 const std::vector<Turn> timing = {
178 {"A", "t500", -100}, 262 {"A", "t500", -100},
179 {"B", "t500", 0}, 263 {"B", "t500", 0},
180 }; 264 };
181 auto mock_wavreader_factory = CreateMockWavReaderFactory(); 265 auto mock_wavreader_factory = CreateMockWavReaderFactory();
182 266
183 // There is one unique audio track to read. 267 // There is one unique audio track to read.
184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); 268 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);
185 269
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
518 602
519 for (int sample_rate : sample_rates) { 603 for (int sample_rate : sample_rates) {
520 const rtc::Pathname temp_filename( 604 const rtc::Pathname temp_filename(
521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) 605 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)
522 + ".wav"); 606 + ".wav");
523 607
524 // Write wav file. 608 // Write wav file.
525 const std::size_t num_samples = duration_seconds * sample_rate; 609 const std::size_t num_samples = duration_seconds * sample_rate;
526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; 610 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};
527 CreateSineWavFile(temp_filename.pathname(), params); 611 CreateSineWavFile(temp_filename.pathname(), params);
528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created ("
529 << num_samples << " samples)";
530 612
531 // Load wav file and check if params match. 613 // Load wav file and check if params match.
532 WavReaderFactory wav_reader_factory; 614 WavReaderFactory wav_reader_factory;
533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname()); 615 MockWavReaderFactory::Params expeted_params = {
534 EXPECT_EQ(sample_rate, wav_reader->SampleRate()); 616 sample_rate, 1u, num_samples};
535 EXPECT_EQ(1u, wav_reader->NumChannels()); 617 CheckAudioTrackParams(
536 EXPECT_EQ(num_samples, wav_reader->NumSamples()); 618 wav_reader_factory, temp_filename.pathname(), expeted_params);
537 619
538 // Clean up. 620 // Clean up.
539 remove(temp_filename.pathname().c_str()); 621 remove(temp_filename.pathname().c_str());
540 } 622 }
541 } 623 }
542 624
625 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) {
626 // Simulated call (one character corresponding to 500 ms):
627 // A 0*********...........2*********.....
628 // B ...........1*********.....3*********
629 const std::vector<Turn> expected_timing = {
630 {"A", "t5000_440.wav", 0},
631 {"B", "t5000_880.wav", 500},
632 {"A", "t5000_440.wav", 0},
633 {"B", "t5000_880.wav", -2500},
634 };
635 const std::size_t expected_duration_seconds = 18;
636
637 // Create temporary audio track files.
638 const int sample_rate = 16000;
639 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {
640 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},
641 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},
642 };
643 const std::string audiotracks_path = CreateTemporarySineAudioTracks(
644 sine_tracks_params);
645
646 // Set up the multi-end call.
647 auto wavreader_factory = std::unique_ptr<WavReaderFactory>(
648 new WavReaderFactory());
649 MultiEndCall multiend_call(
650 expected_timing, audiotracks_path, std::move(wavreader_factory));
651
652 // Simulate the call.
653 rtc::Pathname output_path(audiotracks_path);
654 output_path.AppendFolder("output");
655 webrtc::test::CreateDir(output_path.pathname());
656 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname();
657 auto generated_audiotrak_pairs = conversational_speech::Simulate(
658 multiend_call, output_path.pathname());
659 EXPECT_EQ(2u, generated_audiotrak_pairs->size());
660
661 // Check the output.
662 WavReaderFactory wav_reader_factory;
663 const MockWavReaderFactory::Params expeted_params = {
664 sample_rate, 1u, sample_rate * expected_duration_seconds};
665 for (const auto& it : *generated_audiotrak_pairs) {
666 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">";
667 CheckAudioTrackParams(
668 wav_reader_factory, it.second.near_end, expeted_params);
669 CheckAudioTrackParams(
670 wav_reader_factory, it.second.far_end, expeted_params);
671 }
672
673 // Clean.
674 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents(
675 rtc::Pathname(audiotracks_path)))
676 << "Cannot delete temporary data directory " << audiotracks_path;
677 }
678
543 } // namespace test 679 } // namespace test
544 } // namespace webrtc 680 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698