webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc - Issue 2925123003: Revert of Conversational speech tool, simualtor + unit tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2925123003: Revert of Conversational speech tool, simualtor + unit tests (Closed)

Patch Set: Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn ('k') | webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 22 matching lines...) Expand all Loading...
33 // cases in which there are wrong offsets leading to self cross-talk (which is	33 // cases in which there are wrong offsets leading to self cross-talk (which is

34 // rejected).	34 // rejected).

35	35

36 // MSVC++ requires this to be set before any other includes to get M_PI.	36 // MSVC++ requires this to be set before any other includes to get M_PI.

37 #define _USE_MATH_DEFINES	37 #define _USE_MATH_DEFINES

38	38

39 #include <stdio.h>	39 #include <stdio.h>

40 #include <cmath>	40 #include <cmath>

41 #include <map>	41 #include <map>

42 #include <memory>	42 #include <memory>

43 #include <vector>

44	43

45 #include "webrtc/base/logging.h"	44 #include "webrtc/base/logging.h"

46 #include "webrtc/base/optional.h"

47 #include "webrtc/base/pathutils.h"	45 #include "webrtc/base/pathutils.h"

48 #include "webrtc/common_audio/wav_file.h"	46 #include "webrtc/common_audio/wav_file.h"

49 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"	47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"

50 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"	48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"

51 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

52 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h "

53 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"	50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"

54 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"	51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"

55 #include "webrtc/test/gmock.h"	52 #include "webrtc/test/gmock.h"

56 #include "webrtc/test/gtest.h"	53 #include "webrtc/test/gtest.h"

57 #include "webrtc/test/testsupport/fileutils.h"	54 #include "webrtc/test/testsupport/fileutils.h"

58	55

59 namespace webrtc {	56 namespace webrtc {

60 namespace test {	57 namespace test {

61 namespace {	58 namespace {

62	59

(...skipping 16 matching lines...) Expand all Loading...
79 {"A", "a3", 0},	76 {"A", "a3", 0},

80 {"A", "a3", 0},	77 {"A", "a3", 0},

81 };	78 };

82 const std::size_t kNumberOfTurns = expected_timing.size();	79 const std::size_t kNumberOfTurns = expected_timing.size();

83	80

84 // Default arguments for MockWavReaderFactory ctor.	81 // Default arguments for MockWavReaderFactory ctor.

85 // Fake audio track parameters.	82 // Fake audio track parameters.

86 constexpr int kDefaultSampleRate = 48000;	83 constexpr int kDefaultSampleRate = 48000;

87 const std::map<std::string, const MockWavReaderFactory::Params>	84 const std::map<std::string, const MockWavReaderFactory::Params>

88 kDefaultMockWavReaderFactoryParamsMap = {	85 kDefaultMockWavReaderFactoryParamsMap = {

89 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.	86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds.

90 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.	87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds.

91 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.	88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds.

92 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.

93 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.

94 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.

95 };	89 };

96 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =	90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =

97 kDefaultMockWavReaderFactoryParamsMap.at("t500");	91 kDefaultMockWavReaderFactoryParamsMap.at("t500");

98	92

99 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {	93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {

100 return std::unique_ptr<MockWavReaderFactory>(	94 return std::unique_ptr<MockWavReaderFactory>(

101 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,	95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

102 kDefaultMockWavReaderFactoryParamsMap));	96 kDefaultMockWavReaderFactoryParamsMap));

103 }	97 }

104	98

105 void CreateSineWavFile(const std::string& filepath,	99 void CreateSineWavFile(const std::string& filepath,

106 const MockWavReaderFactory::Params& params,	100 const MockWavReaderFactory::Params& params,

107 float frequency = 440.0f) {	101 float frequency = 440.0f) {

108 // Create samples.	102 // Create samples.

109 constexpr double two_pi = 2.0 * M_PI;	103 constexpr double two_pi = 2.0 * M_PI;

110 std::vector<int16_t> samples(params.num_samples);	104 std::vector<int16_t> samples(params.num_samples);

111 for (std::size_t i = 0; i < params.num_samples; ++i) {	105 for (std::size_t i = 0; i < params.num_samples; ++i) {

112 // TODO(alessiob): the produced tone is not pure, improve.	106 // TODO(alessiob): the produced tone is not pure, improve.

113 samples[i] = std::lround(32767.0f * std::sin(	107 samples[i] = std::lround(32767.0f * std::sin(

114 two_pi * i * frequency / params.sample_rate));	108 two_pi * i * frequency / params.sample_rate));

115 }	109 }

116	110

117 // Write samples.	111 // Write samples.

118 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);	112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);

119 wav_writer.WriteSamples(samples.data(), params.num_samples);	113 wav_writer.WriteSamples(samples.data(), params.num_samples);

120 }	114 }

121	115

122 // Parameters to generate audio tracks with CreateSineWavFile.

123 struct SineAudioTrackParams {

124 MockWavReaderFactory::Params params;

125 float frequency;

126 };

127

128 // Creates a temporary directory in which sine audio tracks are written.

129 std::string CreateTemporarySineAudioTracks(

130 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {

131 // Create temporary directory.

132 rtc::Pathname temp_directory(OutputPath());

133 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks");

134 CreateDir(temp_directory.pathname());

135

136 // Create sine tracks.

137 for (const auto& it : sine_tracks_params) {

138 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first);

139 CreateSineWavFile(

140 temp_filepath.pathname(), it.second.params, it.second.frequency);

141 }

142

143 return temp_directory.pathname();

144 }

145

146 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,

147 const std::string& filepath,

148 const MockWavReaderFactory::Params& expeted_params) {

149 auto wav_reader = wav_reader_factory.Create(filepath);

150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());

151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());

152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());

153 }

154

155 void DeleteFolderAndContents(const std::string& dir) {

156 if (!DirExists(dir)) { return; }

157 rtc::Optional<std::vector<std::string>> dir_content = ReadDirectory(dir);

158 EXPECT_TRUE(dir_content);

159 for (const auto& path : *dir_content) {

160 if (DirExists(path)) {

161 DeleteFolderAndContents(path);

162 } else if (FileExists(path)) {

163 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.

164 RemoveFile(path);

165 } else {

166 FAIL();

167 }

168 }

169 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.

170 RemoveDir(dir);

171 }

172

173 } // namespace	116 } // namespace

174	117

175 using testing::_;	118 using testing::_;

176	119

177 // TODO(alessiob): Remove fixture once conversational_speech fully implemented	120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented

178 // and replace TEST_F with TEST.	121 // and replace TEST_F with TEST.

179 class ConversationalSpeechTest : public testing::Test {	122 class ConversationalSpeechTest : public testing::Test {

180 public:	123 public:

181 ConversationalSpeechTest() {	124 ConversationalSpeechTest() {

182 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);	125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);

183 }	126 }

184 };	127 };

185	128

186 TEST_F(ConversationalSpeechTest, Settings) {	129 TEST_F(ConversationalSpeechTest, Settings) {

187 const conversational_speech::Config config(	130 const conversational_speech::Config config(

188 audiotracks_path, timing_filepath, output_path);	131 audiotracks_path, timing_filepath, output_path);

189	132

190 // Test getters.	133 // Test getters.

191 EXPECT_EQ(audiotracks_path, config.audiotracks_path());	134 EXPECT_EQ(audiotracks_path, config.audiotracks_path());

192 EXPECT_EQ(timing_filepath, config.timing_filepath());	135 EXPECT_EQ(timing_filepath, config.timing_filepath());

193 EXPECT_EQ(output_path, config.output_path());	136 EXPECT_EQ(output_path, config.output_path());

194 }	137 }

195	138

196 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {	139 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {

197 // Save test timing.	140 // Save test timing.

198 const std::string temporary_filepath = TempFilename(	141 const std::string temporary_filepath = webrtc::test::TempFilename(

199 OutputPath(), "TempTimingTestFile");	142 webrtc::test::OutputPath(), "TempTimingTestFile");

200 SaveTiming(temporary_filepath, expected_timing);	143 SaveTiming(temporary_filepath, expected_timing);

201	144

202 // Create a std::vector<Turn> instance by loading from file.	145 // Create a std::vector<Turn> instance by loading from file.

203 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);	146 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);

204 std::remove(temporary_filepath.c_str());	147 std::remove(temporary_filepath.c_str());

205	148

206 // Check size.	149 // Check size.

207 EXPECT_EQ(expected_timing.size(), actual_timing.size());	150 EXPECT_EQ(expected_timing.size(), actual_timing.size());

208	151

209 // Check Turn instances.	152 // Check Turn instances.

(...skipping 13 matching lines...) Expand all Loading...
223 conversational_speech::MultiEndCall multiend_call(	166 conversational_speech::MultiEndCall multiend_call(

224 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));	167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));

225 EXPECT_TRUE(multiend_call.valid());	168 EXPECT_TRUE(multiend_call.valid());

226	169

227 // Test.	170 // Test.

228 EXPECT_EQ(2u, multiend_call.speaker_names().size());	171 EXPECT_EQ(2u, multiend_call.speaker_names().size());

229 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());	172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());

230 EXPECT_EQ(6u, multiend_call.speaking_turns().size());	173 EXPECT_EQ(6u, multiend_call.speaking_turns().size());

231 }	174 }

232	175

233 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {

234 const std::vector<Turn> timing = {

235 {"A", "sr8000", 0},

236 {"B", "sr16000", 0},

237 };

238 auto mock_wavreader_factory = CreateMockWavReaderFactory();

239

240 // There are two unique audio tracks to read.

241 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

242

243 MultiEndCall multiend_call(

244 timing, audiotracks_path, std::move(mock_wavreader_factory));

245 EXPECT_FALSE(multiend_call.valid());

246 }

247

248 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {

249 const std::vector<Turn> timing = {

250 {"A", "sr16000_stereo", 0},

251 {"B", "sr16000_stereo", 0},

252 };

253 auto mock_wavreader_factory = CreateMockWavReaderFactory();

254

255 // There is one unique audio track to read.

256 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

257

258 MultiEndCall multiend_call(

259 timing, audiotracks_path, std::move(mock_wavreader_factory));

260 EXPECT_FALSE(multiend_call.valid());

261 }

262

263 TEST_F(ConversationalSpeechTest,

264 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {

265 const std::vector<Turn> timing = {

266 {"A", "sr8000", 0},

267 {"B", "sr16000_stereo", 0},

268 };

269 auto mock_wavreader_factory = CreateMockWavReaderFactory();

270

271 // There are two unique audio tracks to read.

272 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

273

274 MultiEndCall multiend_call(

275 timing, audiotracks_path, std::move(mock_wavreader_factory));

276 EXPECT_FALSE(multiend_call.valid());

277 }

278

279 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {	176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {

280 const std::vector<Turn> timing = {	177 const std::vector<Turn> timing = {

281 {"A", "t500", -100},	178 {"A", "t500", -100},

282 {"B", "t500", 0},	179 {"B", "t500", 0},

283 };	180 };

284 auto mock_wavreader_factory = CreateMockWavReaderFactory();	181 auto mock_wavreader_factory = CreateMockWavReaderFactory();

285	182

286 // There is one unique audio track to read.	183 // There is one unique audio track to read.

287 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);	184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);

288	185

(...skipping 332 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
621	518

622 for (int sample_rate : sample_rates) {	519 for (int sample_rate : sample_rates) {

623 const rtc::Pathname temp_filename(	520 const rtc::Pathname temp_filename(

624 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)	521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)

625 + ".wav");	522 + ".wav");

626	523

627 // Write wav file.	524 // Write wav file.

628 const std::size_t num_samples = duration_seconds * sample_rate;	525 const std::size_t num_samples = duration_seconds * sample_rate;

629 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};	526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};

630 CreateSineWavFile(temp_filename.pathname(), params);	527 CreateSineWavFile(temp_filename.pathname(), params);

	528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created ("

	529 << num_samples << " samples)";

631	530

632 // Load wav file and check if params match.	531 // Load wav file and check if params match.

633 WavReaderFactory wav_reader_factory;	532 WavReaderFactory wav_reader_factory;

634 MockWavReaderFactory::Params expeted_params = {	533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname());

635 sample_rate, 1u, num_samples};	534 EXPECT_EQ(sample_rate, wav_reader->SampleRate());

636 CheckAudioTrackParams(	535 EXPECT_EQ(1u, wav_reader->NumChannels());

637 wav_reader_factory, temp_filename.pathname(), expeted_params);	536 EXPECT_EQ(num_samples, wav_reader->NumSamples());

638	537

639 // Clean up.	538 // Clean up.

640 remove(temp_filename.pathname().c_str());	539 remove(temp_filename.pathname().c_str());

641 }	540 }

642 }	541 }

643	542

644 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) {

645 // Simulated call (one character corresponding to 500 ms):

646 // A 0*******...........2*******.....

647 // B ...........1*******.....3*******

648 const std::vector<Turn> expected_timing = {

649 {"A", "t5000_440.wav", 0},

650 {"B", "t5000_880.wav", 500},

651 {"A", "t5000_440.wav", 0},

652 {"B", "t5000_880.wav", -2500},

653 };

654 const std::size_t expected_duration_seconds = 18;

655

656 // Create temporary audio track files.

657 const int sample_rate = 16000;

658 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {

659 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},

660 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},

661 };

662 const std::string audiotracks_path = CreateTemporarySineAudioTracks(

663 sine_tracks_params);

664

665 // Set up the multi-end call.

666 auto wavreader_factory = std::unique_ptr<WavReaderFactory>(

667 new WavReaderFactory());

668 MultiEndCall multiend_call(

669 expected_timing, audiotracks_path, std::move(wavreader_factory));

670

671 // Simulate the call.

672 rtc::Pathname output_path(audiotracks_path);

673 output_path.AppendFolder("output");

674 CreateDir(output_path.pathname());

675 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname();

676 auto generated_audiotrak_pairs = conversational_speech::Simulate(

677 multiend_call, output_path.pathname());

678 EXPECT_EQ(2u, generated_audiotrak_pairs->size());

679

680 // Check the output.

681 WavReaderFactory wav_reader_factory;

682 const MockWavReaderFactory::Params expeted_params = {

683 sample_rate, 1u, sample_rate * expected_duration_seconds};

684 for (const auto& it : *generated_audiotrak_pairs) {

685 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">";

686 CheckAudioTrackParams(

687 wav_reader_factory, it.second.near_end, expeted_params);

688 CheckAudioTrackParams(

689 wav_reader_factory, it.second.far_end, expeted_params);

690 }

691

692 // Clean.

693 EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path));

694 }

695

696 } // namespace test	543 } // namespace test

697 } // namespace webrtc	544 } // namespace webrtc

OLD	NEW