OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h
" |
| 12 |
| 13 #include <set> |
| 14 #include <utility> |
| 15 #include <vector> |
| 16 |
| 17 #include "webrtc/base/array_view.h" |
| 18 #include "webrtc/base/constructormagic.h" |
| 19 #include "webrtc/base/logging.h" |
| 20 #include "webrtc/base/pathutils.h" |
| 21 #include "webrtc/base/ptr_util.h" |
| 22 #include "webrtc/common_audio/wav_file.h" |
| 23 #include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_i
nterface.h" |
| 24 |
| 25 namespace webrtc { |
| 26 namespace test { |
| 27 namespace { |
| 28 |
| 29 using conversational_speech::MultiEndCall; |
| 30 using conversational_speech::SpeakerOutputFilePaths; |
| 31 using conversational_speech::WavReaderInterface; |
| 32 |
| 33 // Combines output path and speaker names to define the output file paths for |
| 34 // the near-end and far=end audio tracks. |
| 35 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> |
| 36 InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names, |
| 37 const std::string& output_path) { |
| 38 // Create map. |
| 39 auto speaker_output_file_paths_map = rtc::MakeUnique< |
| 40 std::map<std::string, SpeakerOutputFilePaths>>(); |
| 41 |
| 42 // Add near-end and far-end output paths into the map. |
| 43 for (const auto& speaker_name : speaker_names) { |
| 44 const rtc::Pathname near_end_path( |
| 45 output_path, "s_" + speaker_name + "-near_end.wav"); |
| 46 LOG(LS_VERBOSE) << "The near-end audio track will be created in " |
| 47 << near_end_path.pathname() << "."; |
| 48 |
| 49 const rtc::Pathname far_end_path( |
| 50 output_path, "s_" + speaker_name + "-far_end.wav"); |
| 51 LOG(LS_VERBOSE) << "The far-end audio track will be created in " |
| 52 << far_end_path.pathname() << "."; |
| 53 |
| 54 // Add to map. |
| 55 speaker_output_file_paths_map->emplace( |
| 56 std::piecewise_construct, |
| 57 std::forward_as_tuple(speaker_name), |
| 58 std::forward_as_tuple(near_end_path.pathname(), |
| 59 far_end_path.pathname())); |
| 60 } |
| 61 |
| 62 return speaker_output_file_paths_map; |
| 63 } |
| 64 |
| 65 // Class that provides one WavWriter for the near-end and one for the far-end |
| 66 // output track of a speaker. |
| 67 class SpeakerWavWriters { |
| 68 public: |
| 69 SpeakerWavWriters( |
| 70 const SpeakerOutputFilePaths& output_file_paths, int sample_rate) |
| 71 : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), |
| 72 far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} |
| 73 WavWriter* near_end_wav_writer() { |
| 74 return &near_end_wav_writer_; |
| 75 } |
| 76 WavWriter* far_end_wav_writer() { |
| 77 return &far_end_wav_writer_; |
| 78 } |
| 79 private: |
| 80 WavWriter near_end_wav_writer_; |
| 81 WavWriter far_end_wav_writer_; |
| 82 }; |
| 83 |
| 84 // Initializes one WavWriter instance for each speaker and both the near-end and |
| 85 // far-end output tracks. |
| 86 std::unique_ptr<std::map<std::string, SpeakerWavWriters>> |
| 87 InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>& |
| 88 speaker_output_file_paths, int sample_rate) { |
| 89 // Create map. |
| 90 auto speaker_wav_writers_map = rtc::MakeUnique< |
| 91 std::map<std::string, SpeakerWavWriters>>(); |
| 92 |
| 93 // Add SpeakerWavWriters instance into the map. |
| 94 for (auto it = speaker_output_file_paths.begin(); |
| 95 it != speaker_output_file_paths.end(); ++it) { |
| 96 speaker_wav_writers_map->emplace( |
| 97 std::piecewise_construct, |
| 98 std::forward_as_tuple(it->first), |
| 99 std::forward_as_tuple(it->second, sample_rate)); |
| 100 } |
| 101 |
| 102 return speaker_wav_writers_map; |
| 103 } |
| 104 |
| 105 // Reads all the samples for each audio track. |
| 106 std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks( |
| 107 const std::map<std::string, std::unique_ptr<WavReaderInterface>>& |
| 108 audiotrack_readers) { |
| 109 // Create map. |
| 110 auto audiotracks_map = rtc::MakeUnique< |
| 111 std::map<std::string, std::vector<int16_t>>>(); |
| 112 |
| 113 // Add audio track vectors. |
| 114 for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); |
| 115 ++it) { |
| 116 // Add map entry. |
| 117 audiotracks_map->emplace( |
| 118 std::piecewise_construct, |
| 119 std::forward_as_tuple(it->first), |
| 120 std::forward_as_tuple(it->second->NumSamples())); |
| 121 |
| 122 // Read samples. |
| 123 it->second->ReadInt16Samples(audiotracks_map->at(it->first)); |
| 124 } |
| 125 |
| 126 return audiotracks_map; |
| 127 } |
| 128 |
| 129 // Writes all the values in |source_samples| via |wav_writer|. If the number of |
| 130 // previously written samples in |wav_writer| is less than |interval_begin|, it |
| 131 // adds zeros as left padding. The padding corresponds to intervals during which |
| 132 // a speaker is not active. |
| 133 void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples, |
| 134 size_t interval_begin, WavWriter* wav_writer) { |
| 135 // Add left padding. |
| 136 RTC_CHECK(wav_writer); |
| 137 RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); |
| 138 size_t padding_size = interval_begin - wav_writer->num_samples(); |
| 139 if (padding_size != 0) { |
| 140 const std::vector<int16_t> padding(padding_size, 0); |
| 141 wav_writer->WriteSamples(padding.data(), padding_size); |
| 142 } |
| 143 |
| 144 // Write source samples. |
| 145 wav_writer->WriteSamples(source_samples.data(), source_samples.size()); |
| 146 } |
| 147 |
| 148 // Appends zeros via |wav_writer|. The number of zeros is always non-negative |
| 149 // and equal to the difference between the previously written samples and |
| 150 // |pad_samples|. |
| 151 void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { |
| 152 RTC_CHECK(wav_writer); |
| 153 RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); |
| 154 size_t padding_size = pad_samples - wav_writer->num_samples(); |
| 155 if (padding_size != 0) { |
| 156 const std::vector<int16_t> padding(padding_size, 0); |
| 157 wav_writer->WriteSamples(padding.data(), padding_size); |
| 158 } |
| 159 } |
| 160 |
| 161 } // namespace |
| 162 |
| 163 namespace conversational_speech { |
| 164 |
| 165 std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate( |
| 166 const MultiEndCall& multiend_call, const std::string& output_path) { |
| 167 // Set output file paths and initialize wav writers. |
| 168 const auto& speaker_names = multiend_call.speaker_names(); |
| 169 auto speaker_output_file_paths = InitSpeakerOutputFilePaths( |
| 170 speaker_names, output_path); |
| 171 auto speakers_wav_writers = InitSpeakersWavWriters( |
| 172 *speaker_output_file_paths, multiend_call.sample_rate()); |
| 173 |
| 174 // Preload all the input audio tracks. |
| 175 const auto& audiotrack_readers = multiend_call.audiotrack_readers(); |
| 176 auto audiotracks = PreloadAudioTracks(audiotrack_readers); |
| 177 |
| 178 // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end |
| 179 // across the 2 speakers are symmetric; hence, the code below could be |
| 180 // replaced by only creating the near-end or the far-end. However, this would |
| 181 // require to split the unit tests and document the behavior in README.md. |
| 182 // In practice, it should not be an issue since the files are not expected to |
| 183 // be signinificant. |
| 184 |
| 185 // Write near-end and far-end output tracks. |
| 186 for (const auto& speaking_turn : multiend_call.speaking_turns()) { |
| 187 const std::string& active_speaker_name = speaking_turn.speaker_name; |
| 188 auto source_audiotrack = audiotracks->at( |
| 189 speaking_turn.audiotrack_file_name); |
| 190 |
| 191 // Write active speaker's chunk to active speaker's near-end. |
| 192 PadLeftWriteChunk(source_audiotrack, speaking_turn.begin, |
| 193 speakers_wav_writers->at( |
| 194 active_speaker_name).near_end_wav_writer()); |
| 195 |
| 196 // Write active speaker's chunk to other participants' far-ends. |
| 197 for (const std::string& speaker_name : speaker_names) { |
| 198 if (speaker_name == active_speaker_name) |
| 199 continue; |
| 200 PadLeftWriteChunk(source_audiotrack, speaking_turn.begin, |
| 201 speakers_wav_writers->at( |
| 202 speaker_name).far_end_wav_writer()); |
| 203 } |
| 204 } |
| 205 |
| 206 // Finalize all the output tracks with right padding. |
| 207 // This is required to make all the output tracks duration equal. |
| 208 size_t duration_samples = multiend_call.total_duration_samples(); |
| 209 for (const std::string& speaker_name : speaker_names) { |
| 210 PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), |
| 211 duration_samples); |
| 212 PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), |
| 213 duration_samples); |
| 214 } |
| 215 |
| 216 return speaker_output_file_paths; |
| 217 } |
| 218 |
| 219 } // namespace conversational_speech |
| 220 } // namespace test |
| 221 } // namespace webrtc |
OLD | NEW |