webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc - Issue 2930853002: Reland of Conversational speech tool, simualtor + unit tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2930853002: Reland of Conversational speech tool, simualtor + unit tests (Closed)

Patch Set: merge Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn ('k') | webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 22 matching lines...) Expand all Loading...
33 // cases in which there are wrong offsets leading to self cross-talk (which is	33 // cases in which there are wrong offsets leading to self cross-talk (which is

34 // rejected).	34 // rejected).

35	35

36 // MSVC++ requires this to be set before any other includes to get M_PI.	36 // MSVC++ requires this to be set before any other includes to get M_PI.

37 #define _USE_MATH_DEFINES	37 #define _USE_MATH_DEFINES

38	38

39 #include <stdio.h>	39 #include <stdio.h>

40 #include <cmath>	40 #include <cmath>

41 #include <map>	41 #include <map>

42 #include <memory>	42 #include <memory>

	43 #include <vector>

43	44

44 #include "webrtc/base/logging.h"	45 #include "webrtc/base/logging.h"

	46 #include "webrtc/base/optional.h"

45 #include "webrtc/base/pathutils.h"	47 #include "webrtc/base/pathutils.h"

46 #include "webrtc/common_audio/wav_file.h"	48 #include "webrtc/common_audio/wav_file.h"

47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"	49 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"

48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"	50 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"

49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	51 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

	52 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h "

50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"	53 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"

51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"	54 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"

52 #include "webrtc/test/gmock.h"	55 #include "webrtc/test/gmock.h"

53 #include "webrtc/test/gtest.h"	56 #include "webrtc/test/gtest.h"

54 #include "webrtc/test/testsupport/fileutils.h"	57 #include "webrtc/test/testsupport/fileutils.h"

55	58

56 namespace webrtc {	59 namespace webrtc {

57 namespace test {	60 namespace test {

58 namespace {	61 namespace {

59	62

(...skipping 16 matching lines...) Expand all Loading...
76 {"A", "a3", 0},	79 {"A", "a3", 0},

77 {"A", "a3", 0},	80 {"A", "a3", 0},

78 };	81 };

79 const std::size_t kNumberOfTurns = expected_timing.size();	82 const std::size_t kNumberOfTurns = expected_timing.size();

80	83

81 // Default arguments for MockWavReaderFactory ctor.	84 // Default arguments for MockWavReaderFactory ctor.

82 // Fake audio track parameters.	85 // Fake audio track parameters.

83 constexpr int kDefaultSampleRate = 48000;	86 constexpr int kDefaultSampleRate = 48000;

84 const std::map<std::string, const MockWavReaderFactory::Params>	87 const std::map<std::string, const MockWavReaderFactory::Params>

85 kDefaultMockWavReaderFactoryParamsMap = {	88 kDefaultMockWavReaderFactoryParamsMap = {

86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds.	89 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.

87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds.	90 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.

88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds.	91 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.

	92 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.

	93 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.

	94 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.

89 };	95 };

90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =	96 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =

91 kDefaultMockWavReaderFactoryParamsMap.at("t500");	97 kDefaultMockWavReaderFactoryParamsMap.at("t500");

92	98

93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {	99 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {

94 return std::unique_ptr<MockWavReaderFactory>(	100 return std::unique_ptr<MockWavReaderFactory>(

95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,	101 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

96 kDefaultMockWavReaderFactoryParamsMap));	102 kDefaultMockWavReaderFactoryParamsMap));

97 }	103 }

98	104

99 void CreateSineWavFile(const std::string& filepath,	105 void CreateSineWavFile(const std::string& filepath,

100 const MockWavReaderFactory::Params& params,	106 const MockWavReaderFactory::Params& params,

101 float frequency = 440.0f) {	107 float frequency = 440.0f) {

102 // Create samples.	108 // Create samples.

103 constexpr double two_pi = 2.0 * M_PI;	109 constexpr double two_pi = 2.0 * M_PI;

104 std::vector<int16_t> samples(params.num_samples);	110 std::vector<int16_t> samples(params.num_samples);

105 for (std::size_t i = 0; i < params.num_samples; ++i) {	111 for (std::size_t i = 0; i < params.num_samples; ++i) {

106 // TODO(alessiob): the produced tone is not pure, improve.	112 // TODO(alessiob): the produced tone is not pure, improve.

107 samples[i] = std::lround(32767.0f * std::sin(	113 samples[i] = std::lround(32767.0f * std::sin(

108 two_pi * i * frequency / params.sample_rate));	114 two_pi * i * frequency / params.sample_rate));

109 }	115 }

110	116

111 // Write samples.	117 // Write samples.

112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);	118 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);

113 wav_writer.WriteSamples(samples.data(), params.num_samples);	119 wav_writer.WriteSamples(samples.data(), params.num_samples);

114 }	120 }

115	121

	122 // Parameters to generate audio tracks with CreateSineWavFile.

	123 struct SineAudioTrackParams {

	124 MockWavReaderFactory::Params params;

	125 float frequency;

	126 };

	127

	128 // Creates a temporary directory in which sine audio tracks are written.

	129 std::string CreateTemporarySineAudioTracks(

	130 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {

	131 // Create temporary directory.

	132 rtc::Pathname temp_directory(OutputPath());

	133 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks");

	134 CreateDir(temp_directory.pathname());

	135

	136 // Create sine tracks.

	137 for (const auto& it : sine_tracks_params) {

	138 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first);

	139 CreateSineWavFile(

	140 temp_filepath.pathname(), it.second.params, it.second.frequency);

	141 }

	142

	143 return temp_directory.pathname();

	144 }

	145

	146 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,

	147 const std::string& filepath,

	148 const MockWavReaderFactory::Params& expeted_params) {

	149 auto wav_reader = wav_reader_factory.Create(filepath);

	150 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());

	151 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());

	152 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());

	153 }

	154

	155 void DeleteFolderAndContents(const std::string& dir) {

	156 if (!DirExists(dir)) { return; }

	157 rtc::Optional<std::vector<std::string>> dir_content = ReadDirectory(dir);

	158 EXPECT_TRUE(dir_content);

	159 for (const auto& path : *dir_content) {

	160 if (DirExists(path)) {

	161 DeleteFolderAndContents(path);

	162 } else if (FileExists(path)) {

	163 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.

	164 RemoveFile(path);

	165 } else {

	166 FAIL();

	167 }

	168 }

	169 // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed.

	170 RemoveDir(dir);

	171 }

	172

116 } // namespace	173 } // namespace

117	174

118 using testing::_;	175 using testing::_;

119	176

120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented	177 // TODO(alessiob): Remove fixture once conversational_speech fully implemented

121 // and replace TEST_F with TEST.	178 // and replace TEST_F with TEST.

122 class ConversationalSpeechTest : public testing::Test {	179 class ConversationalSpeechTest : public testing::Test {

123 public:	180 public:

124 ConversationalSpeechTest() {	181 ConversationalSpeechTest() {

125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);	182 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);

126 }	183 }

127 };	184 };

128	185

129 TEST_F(ConversationalSpeechTest, Settings) {	186 TEST_F(ConversationalSpeechTest, Settings) {

130 const conversational_speech::Config config(	187 const conversational_speech::Config config(

131 audiotracks_path, timing_filepath, output_path);	188 audiotracks_path, timing_filepath, output_path);

132	189

133 // Test getters.	190 // Test getters.

134 EXPECT_EQ(audiotracks_path, config.audiotracks_path());	191 EXPECT_EQ(audiotracks_path, config.audiotracks_path());

135 EXPECT_EQ(timing_filepath, config.timing_filepath());	192 EXPECT_EQ(timing_filepath, config.timing_filepath());

136 EXPECT_EQ(output_path, config.output_path());	193 EXPECT_EQ(output_path, config.output_path());

137 }	194 }

138	195

139 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {	196 TEST_F(ConversationalSpeechTest, TimingSaveLoad) {

140 // Save test timing.	197 // Save test timing.

141 const std::string temporary_filepath = webrtc::test::TempFilename(	198 const std::string temporary_filepath = TempFilename(

142 webrtc::test::OutputPath(), "TempTimingTestFile");	199 OutputPath(), "TempTimingTestFile");

143 SaveTiming(temporary_filepath, expected_timing);	200 SaveTiming(temporary_filepath, expected_timing);

144	201

145 // Create a std::vector<Turn> instance by loading from file.	202 // Create a std::vector<Turn> instance by loading from file.

146 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);	203 std::vector<Turn> actual_timing = LoadTiming(temporary_filepath);

147 std::remove(temporary_filepath.c_str());	204 std::remove(temporary_filepath.c_str());

148	205

149 // Check size.	206 // Check size.

150 EXPECT_EQ(expected_timing.size(), actual_timing.size());	207 EXPECT_EQ(expected_timing.size(), actual_timing.size());

151	208

152 // Check Turn instances.	209 // Check Turn instances.

(...skipping 13 matching lines...) Expand all Loading...
166 conversational_speech::MultiEndCall multiend_call(	223 conversational_speech::MultiEndCall multiend_call(

167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));	224 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));

168 EXPECT_TRUE(multiend_call.valid());	225 EXPECT_TRUE(multiend_call.valid());

169	226

170 // Test.	227 // Test.

171 EXPECT_EQ(2u, multiend_call.speaker_names().size());	228 EXPECT_EQ(2u, multiend_call.speaker_names().size());

172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());	229 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());

173 EXPECT_EQ(6u, multiend_call.speaking_turns().size());	230 EXPECT_EQ(6u, multiend_call.speaking_turns().size());

174 }	231 }

175	232

	233 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {

	234 const std::vector<Turn> timing = {

	235 {"A", "sr8000", 0},

	236 {"B", "sr16000", 0},

	237 };

	238 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	239

	240 // There are two unique audio tracks to read.

	241 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	242

	243 MultiEndCall multiend_call(

	244 timing, audiotracks_path, std::move(mock_wavreader_factory));

	245 EXPECT_FALSE(multiend_call.valid());

	246 }

	247

	248 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {

	249 const std::vector<Turn> timing = {

	250 {"A", "sr16000_stereo", 0},

	251 {"B", "sr16000_stereo", 0},

	252 };

	253 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	254

	255 // There is one unique audio track to read.

	256 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	257

	258 MultiEndCall multiend_call(

	259 timing, audiotracks_path, std::move(mock_wavreader_factory));

	260 EXPECT_FALSE(multiend_call.valid());

	261 }

	262

	263 TEST_F(ConversationalSpeechTest,

	264 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {

	265 const std::vector<Turn> timing = {

	266 {"A", "sr8000", 0},

	267 {"B", "sr16000_stereo", 0},

	268 };

	269 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	270

	271 // There are two unique audio tracks to read.

	272 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	273

	274 MultiEndCall multiend_call(

	275 timing, audiotracks_path, std::move(mock_wavreader_factory));

	276 EXPECT_FALSE(multiend_call.valid());

	277 }

	278

176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {	279 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {

177 const std::vector<Turn> timing = {	280 const std::vector<Turn> timing = {

178 {"A", "t500", -100},	281 {"A", "t500", -100},

179 {"B", "t500", 0},	282 {"B", "t500", 0},

180 };	283 };

181 auto mock_wavreader_factory = CreateMockWavReaderFactory();	284 auto mock_wavreader_factory = CreateMockWavReaderFactory();

182	285

183 // There is one unique audio track to read.	286 // There is one unique audio track to read.

184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);	287 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);

185	288

(...skipping 332 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
518	621

519 for (int sample_rate : sample_rates) {	622 for (int sample_rate : sample_rates) {

520 const rtc::Pathname temp_filename(	623 const rtc::Pathname temp_filename(

521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)	624 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)

522 + ".wav");	625 + ".wav");

523	626

524 // Write wav file.	627 // Write wav file.

525 const std::size_t num_samples = duration_seconds * sample_rate;	628 const std::size_t num_samples = duration_seconds * sample_rate;

526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};	629 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};

527 CreateSineWavFile(temp_filename.pathname(), params);	630 CreateSineWavFile(temp_filename.pathname(), params);

528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created ("

529 << num_samples << " samples)";

530	631

531 // Load wav file and check if params match.	632 // Load wav file and check if params match.

532 WavReaderFactory wav_reader_factory;	633 WavReaderFactory wav_reader_factory;

533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname());	634 MockWavReaderFactory::Params expeted_params = {

534 EXPECT_EQ(sample_rate, wav_reader->SampleRate());	635 sample_rate, 1u, num_samples};

535 EXPECT_EQ(1u, wav_reader->NumChannels());	636 CheckAudioTrackParams(

536 EXPECT_EQ(num_samples, wav_reader->NumSamples());	637 wav_reader_factory, temp_filename.pathname(), expeted_params);

537	638

538 // Clean up.	639 // Clean up.

539 remove(temp_filename.pathname().c_str());	640 remove(temp_filename.pathname().c_str());

540 }	641 }

541 }	642 }

542	643

	644 TEST_F(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) {

	645 // Simulated call (one character corresponding to 500 ms):

	646 // A 0*******...........2*******.....

	647 // B ...........1*******.....3*******

	648 const std::vector<Turn> expected_timing = {

	649 {"A", "t5000_440.wav", 0},

	650 {"B", "t5000_880.wav", 500},

	651 {"A", "t5000_440.wav", 0},

	652 {"B", "t5000_880.wav", -2500},

	653 };

	654 const std::size_t expected_duration_seconds = 18;

	655

	656 // Create temporary audio track files.

	657 const int sample_rate = 16000;

	658 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {

	659 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},

	660 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},

	661 };

	662 const std::string audiotracks_path = CreateTemporarySineAudioTracks(

	663 sine_tracks_params);

	664

	665 // Set up the multi-end call.

	666 auto wavreader_factory = std::unique_ptr<WavReaderFactory>(

	667 new WavReaderFactory());

	668 MultiEndCall multiend_call(

	669 expected_timing, audiotracks_path, std::move(wavreader_factory));

	670

	671 // Simulate the call.

	672 rtc::Pathname output_path(audiotracks_path);

	673 output_path.AppendFolder("output");

	674 CreateDir(output_path.pathname());

	675 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname();

	676 auto generated_audiotrak_pairs = conversational_speech::Simulate(

	677 multiend_call, output_path.pathname());

	678 EXPECT_EQ(2u, generated_audiotrak_pairs->size());

	679

	680 // Check the output.

	681 WavReaderFactory wav_reader_factory;

	682 const MockWavReaderFactory::Params expeted_params = {

	683 sample_rate, 1u, sample_rate * expected_duration_seconds};

	684 for (const auto& it : *generated_audiotrak_pairs) {

	685 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">";

	686 CheckAudioTrackParams(

	687 wav_reader_factory, it.second.near_end, expeted_params);

	688 CheckAudioTrackParams(

	689 wav_reader_factory, it.second.far_end, expeted_params);

	690 }

	691

	692 // Clean.

	693 EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path));

	694 }

	695

543 } // namespace test	696 } // namespace test

544 } // namespace webrtc	697 } // namespace webrtc

OLD	NEW