Index: webrtc/modules/audio_processing/test/conversational_speech/simulator.cc |
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc b/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..705b1df84732a4646919e4e0976ed760194ed8e7 |
--- /dev/null |
+++ b/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc |
@@ -0,0 +1,221 @@ |
+/* |
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
+ * |
+ * Use of this source code is governed by a BSD-style license |
+ * that can be found in the LICENSE file in the root of the source |
+ * tree. An additional intellectual property rights grant can be found |
+ * in the file PATENTS. All contributing project authors may |
+ * be found in the AUTHORS file in the root of the source tree. |
+ */ |
+ |
+#include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h" |
+ |
+#include <set> |
+#include <utility> |
+#include <vector> |
+ |
+#include "webrtc/base/array_view.h" |
+#include "webrtc/base/constructormagic.h" |
+#include "webrtc/base/logging.h" |
+#include "webrtc/base/pathutils.h" |
+#include "webrtc/base/ptr_util.h" |
+#include "webrtc/common_audio/wav_file.h" |
+#include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h" |
+ |
+namespace webrtc { |
+namespace test { |
+namespace { |
+ |
+using conversational_speech::MultiEndCall; |
+using conversational_speech::SpeakerOutputFilePaths; |
+using conversational_speech::WavReaderInterface; |
+ |
+// Combines output path and speaker names to define the output file paths for |
+// the near-end and far=end audio tracks. |
+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> |
+ InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names, |
+ const std::string& output_path) { |
+ // Create map. |
+ auto speaker_output_file_paths_map = rtc::MakeUnique< |
+ std::map<std::string, SpeakerOutputFilePaths>>(); |
+ |
+ // Add near-end and far-end output paths into the map. |
+ for (const auto& speaker_name : speaker_names) { |
+ const rtc::Pathname near_end_path( |
+ output_path, "s_" + speaker_name + "-near_end.wav"); |
+ LOG(LS_VERBOSE) << "The near-end audio track will be created in " |
+ << near_end_path.pathname() << "."; |
+ |
+ const rtc::Pathname far_end_path( |
+ output_path, "s_" + speaker_name + "-far_end.wav"); |
+ LOG(LS_VERBOSE) << "The far-end audio track will be created in " |
+ << far_end_path.pathname() << "."; |
+ |
+ // Add to map. |
+ speaker_output_file_paths_map->emplace( |
+ std::piecewise_construct, |
+ std::forward_as_tuple(speaker_name), |
+ std::forward_as_tuple(near_end_path.pathname(), |
+ far_end_path.pathname())); |
+ } |
+ |
+ return speaker_output_file_paths_map; |
+} |
+ |
+// Class that provides one WavWriter for the near-end and one for the far-end |
+// output track of a speaker. |
+class SpeakerWavWriters { |
+ public: |
+ SpeakerWavWriters( |
+ const SpeakerOutputFilePaths& output_file_paths, int sample_rate) |
+ : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), |
+ far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} |
+ WavWriter* near_end_wav_writer() { |
+ return &near_end_wav_writer_; |
+ } |
+ WavWriter* far_end_wav_writer() { |
+ return &far_end_wav_writer_; |
+ } |
+ private: |
+ WavWriter near_end_wav_writer_; |
+ WavWriter far_end_wav_writer_; |
+}; |
+ |
+// Initializes one WavWriter instance for each speaker and both the near-end and |
+// far-end output tracks. |
+std::unique_ptr<std::map<std::string, SpeakerWavWriters>> |
+ InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>& |
+ speaker_output_file_paths, int sample_rate) { |
+ // Create map. |
+ auto speaker_wav_writers_map = rtc::MakeUnique< |
+ std::map<std::string, SpeakerWavWriters>>(); |
+ |
+ // Add SpeakerWavWriters instance into the map. |
+ for (auto it = speaker_output_file_paths.begin(); |
+ it != speaker_output_file_paths.end(); ++it) { |
+ speaker_wav_writers_map->emplace( |
+ std::piecewise_construct, |
+ std::forward_as_tuple(it->first), |
+ std::forward_as_tuple(it->second, sample_rate)); |
+ } |
+ |
+ return speaker_wav_writers_map; |
+} |
+ |
+// Reads all the samples for each audio track. |
+std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks( |
+ const std::map<std::string, std::unique_ptr<WavReaderInterface>>& |
+ audiotrack_readers) { |
+ // Create map. |
+ auto audiotracks_map = rtc::MakeUnique< |
+ std::map<std::string, std::vector<int16_t>>>(); |
+ |
+ // Add audio track vectors. |
+ for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); |
+ ++it) { |
+ // Add map entry. |
+ audiotracks_map->emplace( |
+ std::piecewise_construct, |
+ std::forward_as_tuple(it->first), |
+ std::forward_as_tuple(it->second->NumSamples())); |
+ |
+ // Read samples. |
+ it->second->ReadInt16Samples(audiotracks_map->at(it->first)); |
+ } |
+ |
+ return audiotracks_map; |
+} |
+ |
+// Writes all the values in |source_samples| via |wav_writer|. If the number of |
+// previously written samples in |wav_writer| is less than |interval_begin|, it |
+// adds zeros as left padding. The padding corresponds to intervals during which |
+// a speaker is not active. |
+void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples, |
+ size_t interval_begin, WavWriter* wav_writer) { |
+ // Add left padding. |
+ RTC_CHECK(wav_writer); |
+ RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); |
+ size_t padding_size = interval_begin - wav_writer->num_samples(); |
+ if (padding_size != 0) { |
+ const std::vector<int16_t> padding(padding_size, 0); |
+ wav_writer->WriteSamples(padding.data(), padding_size); |
+ } |
+ |
+ // Write source samples. |
+ wav_writer->WriteSamples(source_samples.data(), source_samples.size()); |
+} |
+ |
+// Appends zeros via |wav_writer|. The number of zeros is always non-negative |
+// and equal to the difference between the previously written samples and |
+// |pad_samples|. |
+void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { |
+ RTC_CHECK(wav_writer); |
+ RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); |
+ size_t padding_size = pad_samples - wav_writer->num_samples(); |
+ if (padding_size != 0) { |
+ const std::vector<int16_t> padding(padding_size, 0); |
+ wav_writer->WriteSamples(padding.data(), padding_size); |
+ } |
+} |
+ |
+} // namespace |
+ |
+namespace conversational_speech { |
+ |
+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate( |
+ const MultiEndCall& multiend_call, const std::string& output_path) { |
+ // Set output file paths and initialize wav writers. |
+ const auto& speaker_names = multiend_call.speaker_names(); |
+ auto speaker_output_file_paths = InitSpeakerOutputFilePaths( |
+ speaker_names, output_path); |
+ auto speakers_wav_writers = InitSpeakersWavWriters( |
+ *speaker_output_file_paths, multiend_call.sample_rate()); |
+ |
+ // Preload all the input audio tracks. |
+ const auto& audiotrack_readers = multiend_call.audiotrack_readers(); |
+ auto audiotracks = PreloadAudioTracks(audiotrack_readers); |
+ |
+ // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end |
+ // across the 2 speakers are symmetric; hence, the code below could be |
+ // replaced by only creating the near-end or the far-end. However, this would |
+ // require to split the unit tests and document the behavior in README.md. |
+ // In practice, it should not be an issue since the files are not expected to |
+ // be signinificant. |
+ |
+ // Write near-end and far-end output tracks. |
+ for (const auto& speaking_turn : multiend_call.speaking_turns()) { |
+ const std::string& active_speaker_name = speaking_turn.speaker_name; |
+ auto source_audiotrack = audiotracks->at( |
+ speaking_turn.audiotrack_file_name); |
+ |
+ // Write active speaker's chunk to active speaker's near-end. |
+ PadLeftWriteChunk(source_audiotrack, speaking_turn.begin, |
+ speakers_wav_writers->at( |
+ active_speaker_name).near_end_wav_writer()); |
+ |
+ // Write active speaker's chunk to other participants' far-ends. |
+ for (const std::string& speaker_name : speaker_names) { |
+ if (speaker_name == active_speaker_name) |
+ continue; |
+ PadLeftWriteChunk(source_audiotrack, speaking_turn.begin, |
+ speakers_wav_writers->at( |
+ speaker_name).far_end_wav_writer()); |
+ } |
+ } |
+ |
+ // Finalize all the output tracks with right padding. |
+ // This is required to make all the output tracks duration equal. |
+ size_t duration_samples = multiend_call.total_duration_samples(); |
+ for (const std::string& speaker_name : speaker_names) { |
+ PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), |
+ duration_samples); |
+ PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), |
+ duration_samples); |
+ } |
+ |
+ return speaker_output_file_paths; |
+} |
+ |
+} // namespace conversational_speech |
+} // namespace test |
+} // namespace webrtc |