webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc - Issue 2790933002: Conversational speech tool, simualtor + unit tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2790933002: Conversational speech tool, simualtor + unit tests (Closed)

Patch Set: map iterators simplified Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn ('k') | webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h » ('j') | webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 24 matching lines...) Expand all Loading...
35	35

36 // MSVC++ requires this to be set before any other includes to get M_PI.	36 // MSVC++ requires this to be set before any other includes to get M_PI.

37 #define _USE_MATH_DEFINES	37 #define _USE_MATH_DEFINES

38	38

39 #include <stdio.h>	39 #include <stdio.h>

40 #include <cmath>	40 #include <cmath>

41 #include <map>	41 #include <map>

42 #include <memory>	42 #include <memory>

43	43

44 #include "webrtc/base/logging.h"	44 #include "webrtc/base/logging.h"

	45 #include "webrtc/base/fileutils.h"

45 #include "webrtc/base/pathutils.h"	46 #include "webrtc/base/pathutils.h"

46 #include "webrtc/common_audio/wav_file.h"	47 #include "webrtc/common_audio/wav_file.h"

47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"	48 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"

48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"	49 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"

49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	50 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

	51 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h "

50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"	52 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"

51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"	53 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"

52 #include "webrtc/test/gmock.h"	54 #include "webrtc/test/gmock.h"

53 #include "webrtc/test/gtest.h"	55 #include "webrtc/test/gtest.h"

54 #include "webrtc/test/testsupport/fileutils.h"	56 #include "webrtc/test/testsupport/fileutils.h"

55	57

56 namespace webrtc {	58 namespace webrtc {

57 namespace test {	59 namespace test {

58 namespace {	60 namespace {

59	61

(...skipping 16 matching lines...) Expand all Loading...
76 {"A", "a3", 0},	78 {"A", "a3", 0},

77 {"A", "a3", 0},	79 {"A", "a3", 0},

78 };	80 };

79 const std::size_t kNumberOfTurns = expected_timing.size();	81 const std::size_t kNumberOfTurns = expected_timing.size();

80	82

81 // Default arguments for MockWavReaderFactory ctor.	83 // Default arguments for MockWavReaderFactory ctor.

82 // Fake audio track parameters.	84 // Fake audio track parameters.

83 constexpr int kDefaultSampleRate = 48000;	85 constexpr int kDefaultSampleRate = 48000;

84 const std::map<std::string, const MockWavReaderFactory::Params>	86 const std::map<std::string, const MockWavReaderFactory::Params>

85 kDefaultMockWavReaderFactoryParamsMap = {	87 kDefaultMockWavReaderFactoryParamsMap = {

86 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds.	88 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.

87 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds.	89 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.

88 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds.	90 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.

	91 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.

	92 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.

	93 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.

89 };	94 };

90 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =	95 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =

91 kDefaultMockWavReaderFactoryParamsMap.at("t500");	96 kDefaultMockWavReaderFactoryParamsMap.at("t500");

92	97

93 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {	98 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {

94 return std::unique_ptr<MockWavReaderFactory>(	99 return std::unique_ptr<MockWavReaderFactory>(

95 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,	100 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

96 kDefaultMockWavReaderFactoryParamsMap));	101 kDefaultMockWavReaderFactoryParamsMap));

97 }	102 }

98	103

99 void CreateSineWavFile(const std::string& filepath,	104 void CreateSineWavFile(const std::string& filepath,

100 const MockWavReaderFactory::Params& params,	105 const MockWavReaderFactory::Params& params,

101 float frequency = 440.0f) {	106 float frequency = 440.0f) {

102 // Create samples.	107 // Create samples.

103 constexpr double two_pi = 2.0 * M_PI;	108 constexpr double two_pi = 2.0 * M_PI;

104 std::vector<int16_t> samples(params.num_samples);	109 std::vector<int16_t> samples(params.num_samples);

105 for (std::size_t i = 0; i < params.num_samples; ++i) {	110 for (std::size_t i = 0; i < params.num_samples; ++i) {

106 // TODO(alessiob): the produced tone is not pure, improve.	111 // TODO(alessiob): the produced tone is not pure, improve.

107 samples[i] = std::lround(32767.0f * std::sin(	112 samples[i] = std::lround(32767.0f * std::sin(

108 two_pi * i * frequency / params.sample_rate));	113 two_pi * i * frequency / params.sample_rate));

109 }	114 }

110	115

111 // Write samples.	116 // Write samples.

112 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);	117 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);

113 wav_writer.WriteSamples(samples.data(), params.num_samples);	118 wav_writer.WriteSamples(samples.data(), params.num_samples);

114 }	119 }

115	120

	121 // Parameters to generate audio tracks with CreateSineWavFile.

	122 struct SineAudioTrackParams {

	123 MockWavReaderFactory::Params params;

	124 float frequency;

	125 };

	126

	127 // Creates a temporary directory in which sine audio tracks are written.

	128 std::string CreateTemporarySineAudioTracks(

	129 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {

	130 // Create temporary directory.

	131 rtc::Pathname temp_directory(OutputPath());

	132 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks");

	133 webrtc::test::CreateDir(temp_directory.pathname());

	134

	135 // Create sine tracks.

	136 for (const auto& it : sine_tracks_params) {

	137 const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first);

	138 CreateSineWavFile(

	139 temp_filepath.pathname(), it.second.params, it.second.frequency);

	140 }

	141

	142 return temp_directory.pathname();

	143 }

	144

	145 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,

	146 const std::string& filepath,

	147 const MockWavReaderFactory::Params& expeted_params) {

	148 auto wav_reader = wav_reader_factory.Create(filepath);

	149 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());

	150 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());

	151 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());

	152 }

	153

116 } // namespace	154 } // namespace

117	155

118 using testing::_;	156 using testing::_;

119	157

120 // TODO(alessiob): Remove fixture once conversational_speech fully implemented	158 // TODO(alessiob): Remove fixture once conversational_speech fully implemented

121 // and replace TEST_F with TEST.	159 // and replace TEST_F with TEST.

122 class ConversationalSpeechTest : public testing::Test {	160 class ConversationalSpeechTest : public testing::Test {

123 public:	161 public:

124 ConversationalSpeechTest() {	162 ConversationalSpeechTest() {

125 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);	163 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
166 conversational_speech::MultiEndCall multiend_call(	204 conversational_speech::MultiEndCall multiend_call(

167 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));	205 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));

168 EXPECT_TRUE(multiend_call.valid());	206 EXPECT_TRUE(multiend_call.valid());

169	207

170 // Test.	208 // Test.

171 EXPECT_EQ(2u, multiend_call.speaker_names().size());	209 EXPECT_EQ(2u, multiend_call.speaker_names().size());

172 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());	210 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());

173 EXPECT_EQ(6u, multiend_call.speaking_turns().size());	211 EXPECT_EQ(6u, multiend_call.speaking_turns().size());

174 }	212 }

175	213

	214 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {

	215 const std::vector<Turn> timing = {

	216 {"A", "sr8000", 0},

	217 {"B", "sr16000", 0},

	218 };

	219 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	220

	221 // There are two unique audio tracks to read.

	222 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	223

	224 MultiEndCall multiend_call(

	225 timing, audiotracks_path, std::move(mock_wavreader_factory));

	226 EXPECT_FALSE(multiend_call.valid());

	227 }

	228

	229 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {

	230 const std::vector<Turn> timing = {

	231 {"A", "sr16000_stereo", 0},

	232 {"B", "sr16000_stereo", 0},

	233 };

	234 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	235

	236 // There is one unique audio track to read.

	237 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	238

	239 MultiEndCall multiend_call(

	240 timing, audiotracks_path, std::move(mock_wavreader_factory));

	241 EXPECT_FALSE(multiend_call.valid());

	242 }

	243

	244 TEST_F(ConversationalSpeechTest,

	245 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {

	246 const std::vector<Turn> timing = {

	247 {"A", "sr8000", 0},

	248 {"B", "sr16000_stereo", 0},

	249 };

	250 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	251

	252 // There are two unique audio tracks to read.

	253 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	254

	255 MultiEndCall multiend_call(

	256 timing, audiotracks_path, std::move(mock_wavreader_factory));

	257 EXPECT_FALSE(multiend_call.valid());

	258 }

	259

176 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {	260 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) {

177 const std::vector<Turn> timing = {	261 const std::vector<Turn> timing = {

178 {"A", "t500", -100},	262 {"A", "t500", -100},

179 {"B", "t500", 0},	263 {"B", "t500", 0},

180 };	264 };

181 auto mock_wavreader_factory = CreateMockWavReaderFactory();	265 auto mock_wavreader_factory = CreateMockWavReaderFactory();

182	266

183 // There is one unique audio track to read.	267 // There is one unique audio track to read.

184 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);	268 EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1);

185	269

(...skipping 332 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
518	602

519 for (int sample_rate : sample_rates) {	603 for (int sample_rate : sample_rates) {

520 const rtc::Pathname temp_filename(	604 const rtc::Pathname temp_filename(

521 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)	605 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)

522 + ".wav");	606 + ".wav");

523	607

524 // Write wav file.	608 // Write wav file.

525 const std::size_t num_samples = duration_seconds * sample_rate;	609 const std::size_t num_samples = duration_seconds * sample_rate;

526 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};	610 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};

527 CreateSineWavFile(temp_filename.pathname(), params);	611 CreateSineWavFile(temp_filename.pathname(), params);

528 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created ("

529 << num_samples << " samples)";

530	612

531 // Load wav file and check if params match.	613 // Load wav file and check if params match.

532 WavReaderFactory wav_reader_factory;	614 WavReaderFactory wav_reader_factory;

533 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname());	615 MockWavReaderFactory::Params expeted_params = {

534 EXPECT_EQ(sample_rate, wav_reader->SampleRate());	616 sample_rate, 1u, num_samples};

535 EXPECT_EQ(1u, wav_reader->NumChannels());	617 CheckAudioTrackParams(

536 EXPECT_EQ(num_samples, wav_reader->NumSamples());	618 wav_reader_factory, temp_filename.pathname(), expeted_params);

537	619

538 // Clean up.	620 // Clean up.

539 remove(temp_filename.pathname().c_str());	621 remove(temp_filename.pathname().c_str());

540 }	622 }

541 }	623 }

542	624

	625 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) {

	626 // Simulated call (one character corresponding to 500 ms):

	627 // A 0*******...........2*******.....

	628 // B ...........1*******.....3*******

	629 const std::vector<Turn> expected_timing = {

	630 {"A", "t5000_440.wav", 0},

	631 {"B", "t5000_880.wav", 500},

	632 {"A", "t5000_440.wav", 0},

	633 {"B", "t5000_880.wav", -2500},

	634 };

	635 const std::size_t expected_duration_seconds = 18;

	636

	637 // Create temporary audio track files.

	638 const int sample_rate = 16000;

	639 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {

	640 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},

	641 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},

	642 };

	643 const std::string audiotracks_path = CreateTemporarySineAudioTracks(

	644 sine_tracks_params);

	645

	646 // Set up the multi-end call.

	647 auto wavreader_factory = std::unique_ptr<WavReaderFactory>(

	648 new WavReaderFactory());

	649 MultiEndCall multiend_call(

	650 expected_timing, audiotracks_path, std::move(wavreader_factory));

	651

	652 // Simulate the call.

	653 rtc::Pathname output_path(audiotracks_path);

	654 output_path.AppendFolder("output");

	655 webrtc::test::CreateDir(output_path.pathname());

	656 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname();

	657 auto generated_audiotrak_pairs = conversational_speech::Simulate(

	658 multiend_call, output_path.pathname());

	659 EXPECT_EQ(2u, generated_audiotrak_pairs->size());

	660

	661 // Check the output.

	662 WavReaderFactory wav_reader_factory;

	663 const MockWavReaderFactory::Params expeted_params = {

	664 sample_rate, 1u, sample_rate * expected_duration_seconds};

	665 for (const auto& it : *generated_audiotrak_pairs) {

	666 LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">";

	667 CheckAudioTrackParams(

	668 wav_reader_factory, it.second.near_end, expeted_params);

	669 CheckAudioTrackParams(

	670 wav_reader_factory, it.second.far_end, expeted_params);

	671 }

	672

	673 // Clean.

	674 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents(

	675 rtc::Pathname(audiotracks_path)))

	676 << "Cannot delete temporary data directory " << audiotracks_path;

	677 }

	678

543 } // namespace test	679 } // namespace test

544 } // namespace webrtc	680 } // namespace webrtc

OLD	NEW