webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc - Issue 2790933002: Conversational speech tool, simualtor + unit tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc

Issue 2790933002: Conversational speech tool, simualtor + unit tests (Closed)

Patch Set: Using AppendFolder() to build paths to folders Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn ('k') | webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h » ('j') | webrtc/modules/audio_processing/test/conversational_speech/simulator.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 24 matching lines...) Expand all Loading...
35	35

36 // MSVC++ requires this to be set before any other includes to get M_PI.	36 // MSVC++ requires this to be set before any other includes to get M_PI.

37 #define _USE_MATH_DEFINES	37 #define _USE_MATH_DEFINES

38	38

39 #include <stdio.h>	39 #include <stdio.h>

40 #include <cmath>	40 #include <cmath>

41 #include <map>	41 #include <map>

42 #include <memory>	42 #include <memory>

43	43

44 #include "webrtc/base/logging.h"	44 #include "webrtc/base/logging.h"

	45 #include "webrtc/base/fileutils.h"

45 #include "webrtc/base/pathutils.h"	46 #include "webrtc/base/pathutils.h"

46 #include "webrtc/common_audio/wav_file.h"	47 #include "webrtc/common_audio/wav_file.h"

47 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"	48 #include "webrtc/modules/audio_processing/test/conversational_speech/config.h"

48 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"	49 #include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavrea der_factory.h"

49 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	50 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

	51 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h "

50 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"	52 #include "webrtc/modules/audio_processing/test/conversational_speech/timing.h"

51 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"	53 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_f actory.h"

52 #include "webrtc/test/gmock.h"	54 #include "webrtc/test/gmock.h"

53 #include "webrtc/test/gtest.h"	55 #include "webrtc/test/gtest.h"

54 #include "webrtc/test/testsupport/fileutils.h"	56 #include "webrtc/test/testsupport/fileutils.h"

55	57

56 namespace webrtc {	58 namespace webrtc {

57 namespace test {	59 namespace test {

58 namespace {	60 namespace {

59	61

(...skipping 15 matching lines...) Expand all Loading...
75 {"B", "b2", -200},	77 {"B", "b2", -200},

76 {"A", "a3", 0},	78 {"A", "a3", 0},

77 {"A", "a3", 0},	79 {"A", "a3", 0},

78 };	80 };

79 const std::size_t kNumberOfTurns = expected_timing.size();	81 const std::size_t kNumberOfTurns = expected_timing.size();

80	82

81 // Fake audio track parameters.	83 // Fake audio track parameters.

82 const int kDefaultSampleRate = 48000;	84 const int kDefaultSampleRate = 48000;

83 const std::map<std::string, const MockWavReaderFactory::Params>	85 const std::map<std::string, const MockWavReaderFactory::Params>

84 kDefaultMockWavReaderFactoryParamsMap = {	86 kDefaultMockWavReaderFactoryParamsMap = {

85 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds.	87 {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds.

86 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds.	88 {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds.

87 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds.	89 {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds.

	90 {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second.

	91 {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second.

	92 {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo.

88 };	93 };

89	94

90 // Default arguments for MockWavReaderFactory ctor.	95 // Default arguments for MockWavReaderFactory ctor.

91 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =	96 const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams =

92 kDefaultMockWavReaderFactoryParamsMap.at("t500");	97 kDefaultMockWavReaderFactoryParamsMap.at("t500");

93	98

94 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {	99 std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() {

95 return std::unique_ptr<MockWavReaderFactory>(	100 return std::unique_ptr<MockWavReaderFactory>(

96 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,	101 new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams,

97 kDefaultMockWavReaderFactoryParamsMap));	102 kDefaultMockWavReaderFactoryParamsMap));

98 }	103 }

99	104

100 void CreateSineWavFile(const std::string& filepath,	105 void CreateSineWavFile(const std::string& filepath,

101 const MockWavReaderFactory::Params& params,	106 const MockWavReaderFactory::Params& params,

102 float frequency = 440.0f) {	107 float frequency = 440.0f) {

103 // Create samples.	108 // Create samples.

104 constexpr double two_pi = 2.0 * M_PI;	109 constexpr double two_pi = 2.0 * M_PI;

105 std::vector<int16_t> samples(params.num_samples);	110 std::vector<int16_t> samples(params.num_samples);

106 for (std::size_t i = 0; i < params.num_samples; ++i) {	111 for (std::size_t i = 0; i < params.num_samples; ++i) {

107 // TODO(alessiob): the produced tone is not pure, improve.	112 // TODO(alessiob): the produced tone is not pure, improve.

108 samples[i] = std::lround(32767.0f * std::sin(	113 samples[i] = std::lround(32767.0f * std::sin(

109 two_pi * i * frequency / params.sample_rate));	114 two_pi * i * frequency / params.sample_rate));

110 }	115 }

111	116

112 // Write samples.	117 // Write samples.

113 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);	118 WavWriter wav_writer(filepath, params.sample_rate, params.num_channels);

114 wav_writer.WriteSamples(samples.data(), params.num_samples);	119 wav_writer.WriteSamples(samples.data(), params.num_samples);

115 }	120 }

116	121

	122 // Parameters to generate audio tracks with CreateSineWavFile.

	123 struct SineAudioTrackParams {

	124 MockWavReaderFactory::Params params;

	125 float frequency;

	126 };

	127

	128 // Creates a temporary directory in which sine audio tracks are written.

	129 std::string CreateTemporarySineAudioTracks(

	130 const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) {

	131 // Create temporary directory.

	132 rtc::Pathname temp_directory(OutputPath());

	133 temp_directory.AppendFolder("TempConversationalSpeechAudioTracks");

	134 webrtc::test::CreateDir(temp_directory.pathname());

	135

	136 // Create sine tracks.

	137 for (auto it = sine_tracks_params.begin(); it != sine_tracks_params.end();
	minyue-webrtc 2017/04/07 13:24:30 for (const auto& sine_tracks_param : sine_tracks_p for (const auto& sine_tracks_param : sine_tracks_params) AleBzk 2017/04/10 08:24:48 Nope. I need both key and value from the map. Show quoted text On 2017/04/07 13:24:30, minyue-webrtc wrote: > for (const auto& sine_tracks_param : sine_tracks_params) Nope. I need both key and value from the map.
	138 ++it) {

	139 const rtc::Pathname temp_filepath(temp_directory.pathname(), it->first);

	140 CreateSineWavFile(

	141 temp_filepath.pathname(), it->second.params, it->second.frequency);

	142 }

	143

	144 return temp_directory.pathname();

	145 }

	146

	147 void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory,

	148 const std::string& filepath,

	149 const MockWavReaderFactory::Params& expeted_params) {

	150 auto wav_reader = wav_reader_factory.Create(filepath);

	151 EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate());

	152 EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels());

	153 EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples());

	154 }

	155

117 } // namespace	156 } // namespace

118	157

119 class ConversationalSpeechTest : public testing::Test {	158 class ConversationalSpeechTest : public testing::Test {

120 public:	159 public:

121 ConversationalSpeechTest() {	160 ConversationalSpeechTest() {

122 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);	161 rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);

123 }	162 }

124 };	163 };

125	164

126 TEST_F(ConversationalSpeechTest, Settings) {	165 TEST_F(ConversationalSpeechTest, Settings) {

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
163 conversational_speech::MultiEndCall multiend_call(	202 conversational_speech::MultiEndCall multiend_call(

164 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));	203 expected_timing, audiotracks_path, std::move(mock_wavreader_factory));

165 EXPECT_TRUE(multiend_call.valid());	204 EXPECT_TRUE(multiend_call.valid());

166	205

167 // Test.	206 // Test.

168 EXPECT_EQ(2u, multiend_call.speaker_names().size());	207 EXPECT_EQ(2u, multiend_call.speaker_names().size());

169 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());	208 EXPECT_EQ(5u, multiend_call.audiotrack_readers().size());

170 EXPECT_EQ(6u, multiend_call.speaking_turns().size());	209 EXPECT_EQ(6u, multiend_call.speaking_turns().size());

171 }	210 }

172	211

	212 TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) {

	213 const std::vector<Turn> timing = {

	214 {"A", "sr8000", 0},

	215 {"B", "sr16000", 0},

	216 };

	217 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	218

	219 // There are two unique audio tracks to read.

	220 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	221

	222 MultiEndCall multiend_call(

	223 timing, audiotracks_path, std::move(mock_wavreader_factory));

	224 EXPECT_FALSE(multiend_call.valid());

	225 }

	226

	227 TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) {

	228 const std::vector<Turn> timing = {

	229 {"A", "sr16000_stereo", 0},

	230 {"B", "sr16000_stereo", 0},

	231 };

	232 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	233

	234 // There is one unique audio track to read.

	235 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

	236

	237 MultiEndCall multiend_call(

	238 timing, audiotracks_path, std::move(mock_wavreader_factory));

	239 EXPECT_FALSE(multiend_call.valid());

	240 }

	241

	242 TEST_F(ConversationalSpeechTest,

	243 MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) {

	244 const std::vector<Turn> timing = {

	245 {"A", "sr8000", 0},

	246 {"B", "sr16000_stereo", 0},

	247 };

	248 auto mock_wavreader_factory = CreateMockWavReaderFactory();

	249

	250 // There are two unique audio tracks to read.

	251 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2);

	252

	253 MultiEndCall multiend_call(

	254 timing, audiotracks_path, std::move(mock_wavreader_factory));

	255 EXPECT_FALSE(multiend_call.valid());

	256 }

	257

173 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) {	258 TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNonNegative) {

174 const std::vector<Turn> timing = {	259 const std::vector<Turn> timing = {

175 {"A", "t500", -100},	260 {"A", "t500", -100},

176 {"B", "t500", 0},	261 {"B", "t500", 0},

177 };	262 };

178 auto mock_wavreader_factory = CreateMockWavReaderFactory();	263 auto mock_wavreader_factory = CreateMockWavReaderFactory();

179	264

180 // There is one unique audio track to read.	265 // There is one unique audio track to read.

181 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);	266 EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1);

182	267

(...skipping 332 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
515	600

516 for (int sample_rate : sample_rates) {	601 for (int sample_rate : sample_rates) {

517 const rtc::Pathname temp_filename(	602 const rtc::Pathname temp_filename(

518 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)	603 OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate)

519 + ".wav");	604 + ".wav");

520	605

521 // Write wav file.	606 // Write wav file.

522 const std::size_t num_samples = duration_seconds * sample_rate;	607 const std::size_t num_samples = duration_seconds * sample_rate;

523 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};	608 MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples};

524 CreateSineWavFile(temp_filename.pathname(), params);	609 CreateSineWavFile(temp_filename.pathname(), params);

525 LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created ("

526 << num_samples << " samples)";

527	610

528 // Load wav file and check if params match.	611 // Load wav file and check if params match.

529 WavReaderFactory wav_reader_factory;	612 WavReaderFactory wav_reader_factory;

530 auto wav_reader = wav_reader_factory.Create(temp_filename.pathname());	613 MockWavReaderFactory::Params expeted_params = {

531 EXPECT_EQ(sample_rate, wav_reader->SampleRate());	614 sample_rate, 1u, num_samples};

532 EXPECT_EQ(1u, wav_reader->NumChannels());	615 CheckAudioTrackParams(

533 EXPECT_EQ(num_samples, wav_reader->NumSamples());	616 wav_reader_factory, temp_filename.pathname(), expeted_params);

534	617

535 // Clean up.	618 // Clean up.

536 remove(temp_filename.pathname().c_str());	619 remove(temp_filename.pathname().c_str());

537 }	620 }

538 }	621 }

539	622

	623 TEST_F(ConversationalSpeechTest, MultiEndCallSimulator) {

	624 // Simulated call (one character corresponding to 500 ms):

	625 // A 0*******...........2*******.....

	626 // B ...........1*******.....3*******

	627 const std::vector<Turn> expected_timing = {

	628 {"A", "t5000_440.wav", 0},

	629 {"B", "t5000_880.wav", 500},

	630 {"A", "t5000_440.wav", 0},

	631 {"B", "t5000_880.wav", -2500},

	632 };

	633 const std::size_t expected_duration_seconds = 18;

	634

	635 // Create temporary audio track files.

	636 const int sample_rate = 16000;

	637 const std::map<std::string, SineAudioTrackParams> sine_tracks_params = {

	638 {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}},

	639 {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}},

	640 };

	641 const std::string audiotracks_path = CreateTemporarySineAudioTracks(

	642 sine_tracks_params);

	643

	644 // Set up the multi-end call.

	645 auto wavreader_factory = std::unique_ptr<WavReaderFactory>(

	646 new WavReaderFactory());

	647 MultiEndCall multiend_call(

	648 expected_timing, audiotracks_path, std::move(wavreader_factory));

	649

	650 // Simulate the call.

	651 rtc::Pathname output_path(audiotracks_path);

	652 output_path.AppendFolder("output");

	653 webrtc::test::CreateDir(output_path.pathname());

	654 LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname();

	655 auto generated_audiotrak_pairs = conversational_speech::Simulate(

	656 multiend_call, output_path.pathname());

	657 EXPECT_EQ(2u, generated_audiotrak_pairs->size());

	658

	659 // Check the output.

	660 WavReaderFactory wav_reader_factory;

	661 const MockWavReaderFactory::Params expeted_params = {

	662 sample_rate, 1u, sample_rate * expected_duration_seconds};

	663 for (auto it = generated_audiotrak_pairs->begin();
	minyue-webrtc 2017/04/07 13:24:30 same here same here AleBzk 2017/04/10 08:24:48 Same here as well :) Show quoted text On 2017/04/07 13:24:30, minyue-webrtc wrote: > same here Same here as well :)
	664 it != generated_audiotrak_pairs->end(); ++it) {

	665 LOG(LS_VERBOSE) << "checking far/near-end for <" << it->first << ">";

	666 CheckAudioTrackParams(

	667 wav_reader_factory, it->second.near_end, expeted_params);

	668 CheckAudioTrackParams(

	669 wav_reader_factory, it->second.far_end, expeted_params);

	670 }

	671

	672 // Clean.

	673 EXPECT_TRUE(rtc::Filesystem::DeleteFolderAndContents(

	674 rtc::Pathname(audiotracks_path)))

	675 << "Cannot delete temporary data directory " << audiotracks_path;

	676 }

	677

540 } // namespace test	678 } // namespace test

541 } // namespace webrtc	679 } // namespace webrtc

OLD	NEW