Index: webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
index 6797beb98305272e9aa19626317a049000b0dcf1..b112e8ff252e71fa076f39df58e7af4d04f9ea2d 100644 |
--- a/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
+++ b/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc |
@@ -40,13 +40,16 @@ |
#include <cmath> |
#include <map> |
#include <memory> |
+#include <vector> |
#include "webrtc/base/logging.h" |
+#include "webrtc/base/optional.h" |
#include "webrtc/base/pathutils.h" |
#include "webrtc/common_audio/wav_file.h" |
#include "webrtc/modules/audio_processing/test/conversational_speech/config.h" |
#include "webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" |
#include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h" |
+#include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h" |
#include "webrtc/modules/audio_processing/test/conversational_speech/timing.h" |
#include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h" |
#include "webrtc/test/gmock.h" |
@@ -83,9 +86,12 @@ const std::size_t kNumberOfTurns = expected_timing.size(); |
constexpr int kDefaultSampleRate = 48000; |
const std::map<std::string, const MockWavReaderFactory::Params> |
kDefaultMockWavReaderFactoryParamsMap = { |
- {"t300", {kDefaultSampleRate, 1u, 14400u}}, // 0.3 seconds. |
- {"t500", {kDefaultSampleRate, 1u, 24000u}}, // 0.5 seconds. |
- {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // 1.0 seconds. |
+ {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds. |
+ {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds. |
+ {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds. |
+ {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second. |
+ {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second. |
+ {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo. |
}; |
const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = |
kDefaultMockWavReaderFactoryParamsMap.at("t500"); |
@@ -113,6 +119,57 @@ void CreateSineWavFile(const std::string& filepath, |
wav_writer.WriteSamples(samples.data(), params.num_samples); |
} |
+// Parameters to generate audio tracks with CreateSineWavFile. |
+struct SineAudioTrackParams { |
+ MockWavReaderFactory::Params params; |
+ float frequency; |
+}; |
+ |
+// Creates a temporary directory in which sine audio tracks are written. |
+std::string CreateTemporarySineAudioTracks( |
+ const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) { |
+ // Create temporary directory. |
+ rtc::Pathname temp_directory(OutputPath()); |
+ temp_directory.AppendFolder("TempConversationalSpeechAudioTracks"); |
+ CreateDir(temp_directory.pathname()); |
+ |
+ // Create sine tracks. |
+ for (const auto& it : sine_tracks_params) { |
+ const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first); |
+ CreateSineWavFile( |
+ temp_filepath.pathname(), it.second.params, it.second.frequency); |
+ } |
+ |
+ return temp_directory.pathname(); |
+} |
+ |
+void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory, |
+ const std::string& filepath, |
+ const MockWavReaderFactory::Params& expeted_params) { |
+ auto wav_reader = wav_reader_factory.Create(filepath); |
+ EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); |
+ EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); |
+ EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); |
+} |
+ |
+void DeleteFolderAndContents(const std::string& dir) { |
+ if (!DirExists(dir)) { return; } |
+ rtc::Optional<std::vector<std::string>> dir_content = ReadDirectory(dir); |
+ EXPECT_TRUE(dir_content); |
+ for (const auto& path : *dir_content) { |
+ if (DirExists(path)) { |
+ DeleteFolderAndContents(path); |
+ } else if (FileExists(path)) { |
+ // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. |
+ RemoveFile(path); |
+ } else { |
+ FAIL(); |
+ } |
+ } |
+ // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. |
+ RemoveDir(dir); |
+} |
+ |
} // namespace |
using testing::_; |
@@ -138,8 +195,8 @@ TEST_F(ConversationalSpeechTest, Settings) { |
TEST_F(ConversationalSpeechTest, TimingSaveLoad) { |
// Save test timing. |
- const std::string temporary_filepath = webrtc::test::TempFilename( |
- webrtc::test::OutputPath(), "TempTimingTestFile"); |
+ const std::string temporary_filepath = TempFilename( |
+ OutputPath(), "TempTimingTestFile"); |
SaveTiming(temporary_filepath, expected_timing); |
// Create a std::vector<Turn> instance by loading from file. |
@@ -173,6 +230,52 @@ TEST_F(ConversationalSpeechTest, MultiEndCallCreate) { |
EXPECT_EQ(6u, multiend_call.speaking_turns().size()); |
} |
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { |
+ const std::vector<Turn> timing = { |
+ {"A", "sr8000", 0}, |
+ {"B", "sr16000", 0}, |
+ }; |
+ auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
+ |
+ // There are two unique audio tracks to read. |
+ EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
+ |
+ MultiEndCall multiend_call( |
+ timing, audiotracks_path, std::move(mock_wavreader_factory)); |
+ EXPECT_FALSE(multiend_call.valid()); |
+} |
+ |
+TEST_F(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { |
+ const std::vector<Turn> timing = { |
+ {"A", "sr16000_stereo", 0}, |
+ {"B", "sr16000_stereo", 0}, |
+ }; |
+ auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
+ |
+ // There is one unique audio track to read. |
+ EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); |
+ |
+ MultiEndCall multiend_call( |
+ timing, audiotracks_path, std::move(mock_wavreader_factory)); |
+ EXPECT_FALSE(multiend_call.valid()); |
+} |
+ |
+TEST_F(ConversationalSpeechTest, |
+ MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { |
+ const std::vector<Turn> timing = { |
+ {"A", "sr8000", 0}, |
+ {"B", "sr16000_stereo", 0}, |
+ }; |
+ auto mock_wavreader_factory = CreateMockWavReaderFactory(); |
+ |
+ // There are two unique audio tracks to read. |
+ EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); |
+ |
+ MultiEndCall multiend_call( |
+ timing, audiotracks_path, std::move(mock_wavreader_factory)); |
+ EXPECT_FALSE(multiend_call.valid()); |
+} |
+ |
TEST_F(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { |
const std::vector<Turn> timing = { |
{"A", "t500", -100}, |
@@ -525,20 +628,70 @@ TEST_F(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { |
const std::size_t num_samples = duration_seconds * sample_rate; |
MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; |
CreateSineWavFile(temp_filename.pathname(), params); |
- LOG(LS_VERBOSE) << "wav file @" << sample_rate << " Hz created (" |
- << num_samples << " samples)"; |
// Load wav file and check if params match. |
WavReaderFactory wav_reader_factory; |
- auto wav_reader = wav_reader_factory.Create(temp_filename.pathname()); |
- EXPECT_EQ(sample_rate, wav_reader->SampleRate()); |
- EXPECT_EQ(1u, wav_reader->NumChannels()); |
- EXPECT_EQ(num_samples, wav_reader->NumSamples()); |
+ MockWavReaderFactory::Params expeted_params = { |
+ sample_rate, 1u, num_samples}; |
+ CheckAudioTrackParams( |
+ wav_reader_factory, temp_filename.pathname(), expeted_params); |
// Clean up. |
remove(temp_filename.pathname().c_str()); |
} |
} |
+TEST_F(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) { |
+ // Simulated call (one character corresponding to 500 ms): |
+ // A 0*********...........2*********..... |
+ // B ...........1*********.....3********* |
+ const std::vector<Turn> expected_timing = { |
+ {"A", "t5000_440.wav", 0}, |
+ {"B", "t5000_880.wav", 500}, |
+ {"A", "t5000_440.wav", 0}, |
+ {"B", "t5000_880.wav", -2500}, |
+ }; |
+ const std::size_t expected_duration_seconds = 18; |
+ |
+ // Create temporary audio track files. |
+ const int sample_rate = 16000; |
+ const std::map<std::string, SineAudioTrackParams> sine_tracks_params = { |
+ {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}}, |
+ {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}}, |
+ }; |
+ const std::string audiotracks_path = CreateTemporarySineAudioTracks( |
+ sine_tracks_params); |
+ |
+ // Set up the multi-end call. |
+ auto wavreader_factory = std::unique_ptr<WavReaderFactory>( |
+ new WavReaderFactory()); |
+ MultiEndCall multiend_call( |
+ expected_timing, audiotracks_path, std::move(wavreader_factory)); |
+ |
+ // Simulate the call. |
+ rtc::Pathname output_path(audiotracks_path); |
+ output_path.AppendFolder("output"); |
+ CreateDir(output_path.pathname()); |
+ LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname(); |
+ auto generated_audiotrak_pairs = conversational_speech::Simulate( |
+ multiend_call, output_path.pathname()); |
+ EXPECT_EQ(2u, generated_audiotrak_pairs->size()); |
+ |
+ // Check the output. |
+ WavReaderFactory wav_reader_factory; |
+ const MockWavReaderFactory::Params expeted_params = { |
+ sample_rate, 1u, sample_rate * expected_duration_seconds}; |
+ for (const auto& it : *generated_audiotrak_pairs) { |
+ LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">"; |
+ CheckAudioTrackParams( |
+ wav_reader_factory, it.second.near_end, expeted_params); |
+ CheckAudioTrackParams( |
+ wav_reader_factory, it.second.far_end, expeted_params); |
+ } |
+ |
+ // Clean. |
+ EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path)); |
+} |
+ |
} // namespace test |
} // namespace webrtc |