webrtc/modules/audio_processing/test/conversational_speech/simulator.cc - Issue 2790933002: Conversational speech tool, simualtor + unit tests

Unified Diff: webrtc/modules/audio_processing/test/conversational_speech/simulator.cc

Issue 2790933002: Conversational speech tool, simualtor + unit tests (Closed)

Patch Set: Using AppendFolder() to build paths to folders Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« webrtc/modules/audio_processing/test/conversational_speech/simulator.h ('K') | « webrtc/modules/audio_processing/test/conversational_speech/simulator.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/test/conversational_speech/simulator.cc

diff --git a/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc b/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc

new file mode 100644

index 0000000000000000000000000000000000000000..bb8742df4022234ba05644659af3b4be94091308

--- /dev/null

+++ b/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc

@@ -0,0 +1,221 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include "webrtc/modules/audio_processing/test/conversational_speech/simulator.h"

+#include <set>

+#include <utility>

+#include <vector>

+#include "webrtc/base/array_view.h"

+#include "webrtc/base/constructormagic.h"

+#include "webrtc/base/logging.h"

+#include "webrtc/base/pathutils.h"

+#include "webrtc/base/ptr_util.h"

+#include "webrtc/common_audio/wav_file.h"

+#include "webrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h"

+namespace webrtc {

+namespace test {

+namespace {

+using conversational_speech::MultiEndCall;

+using conversational_speech::SpeakerOutputFilePaths;

+using conversational_speech::WavReaderInterface;

+// Combines output path and speaker names to define the output file paths for

+// the near-end and far=end audio tracks.

+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>>

minyue-webrtc 2017/04/07 13:24:30 I don't know the benefit of passing a unique_ptr o

AleBzk 2017/04/10 08:24:48 I'm not a move semantics expert, but I'd say that

+ InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names,

+ const std::string& output_path) {

+ // Create map.

+ auto speaker_output_file_paths_map = rtc::MakeUnique<

+ std::map<std::string, SpeakerOutputFilePaths>>();

+ // Add near-end and far-end output paths into the map.

+ for (const auto& speaker_name : speaker_names) {

+ const rtc::Pathname near_end_path(

+ output_path, "s_" + speaker_name + "-near_end.wav");

+ LOG(LS_VERBOSE) << "creating " << near_end_path.pathname();

+ const rtc::Pathname far_end_path(

+ output_path, "s_" + speaker_name + "-far_end.wav");

+ LOG(LS_VERBOSE) << "creating " << far_end_path.pathname();

+ // Add to map.

+ speaker_output_file_paths_map->emplace(

+ std::piecewise_construct,

+ std::forward_as_tuple(speaker_name),

+ std::forward_as_tuple(near_end_path.pathname(),

+ far_end_path.pathname()));

+ }

+ return speaker_output_file_paths_map;

+// Class that provides one WavWriter for the near-end and one for the far-end

+// output track of a speaker.

+class SpeakerWavWriters {

+ public:

+ SpeakerWavWriters(

+ const SpeakerOutputFilePaths& output_file_paths, int sample_rate)

+ : near_end_(output_file_paths.near_end, sample_rate, 1u),

+ far_end_(output_file_paths.far_end, sample_rate, 1u) {}

+ WavWriter& near_end() {

+ return near_end_;

+ }

+ WavWriter& far_end() {

+ return far_end_;

+ }

+ private:

+ WavWriter near_end_;

+ WavWriter far_end_;

+ // RTC_DISALLOW_COPY_AND_ASSIGN(SpeakerWavWriters);

minyue-webrtc 2017/04/07 13:24:30 why commented out?

AleBzk 2017/04/10 08:24:48 yet another leftover. Removed.

+};

+// Initializes one WavWriter instance for each speaker and both the near-end and

+// far-end output tracks.

+std::unique_ptr<std::map<std::string, SpeakerWavWriters>>

+ InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>&

+ speaker_output_file_paths, int sample_rate) {

+ // Create map.

+ auto speaker_wav_writers_map = rtc::MakeUnique<

+ std::map<std::string, SpeakerWavWriters>>();

+ // Add SpeakerWavWriters instance into the map.

+ for (auto it = speaker_output_file_paths.begin();

+ it != speaker_output_file_paths.end(); ++it) {

+ speaker_wav_writers_map->emplace(

+ std::piecewise_construct,

+ std::forward_as_tuple(it->first),

+ std::forward_as_tuple(it->second, sample_rate));

+ }

+ return speaker_wav_writers_map;

+// Reads all the samples for each audio track.

+std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks(

+ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&

+ audiotrack_readers) {

+ // Create map.

+ auto audiotracks_map = rtc::MakeUnique<

+ std::map<std::string, std::vector<int16_t>>>();

+ // Add audio track vectors.

+ for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end();

+ ++it) {

+ // Add map entry.

+ audiotracks_map->emplace(

+ std::piecewise_construct,

+ std::forward_as_tuple(it->first),

+ std::forward_as_tuple(it->second->NumSamples()));

+ // Read samples.

+ it->second->ReadInt16Samples(audiotracks_map->at(it->first));

+ }

+ return audiotracks_map;

+// Writes all the values in source_samples via wav_writer. If the number of

+// previously written samples in wav_writer is less than interval.begin, it adds

minyue-webrtc 2017/04/07 13:24:31 |interval_begin|, (1. underscore, 2. put || on var

AleBzk 2017/04/10 08:24:48 Done.

+// zeros as left padding. The padding corresponds to intervals during which a

+// speaker is not active.

+void PadLeftWriteChunk(const std::vector<int16_t>& source_samples,

minyue-webrtc 2017/04/07 13:24:30 consider using ArrayView instead of vector

AleBzk 2017/04/10 08:24:48 Done.

+ std::size_t interval_begin, WavWriter* wav_writer) {

minyue-webrtc 2017/04/07 13:24:30 no need for std::

AleBzk 2017/04/10 08:24:48 Done.

+ // Add left padding.

+ RTC_CHECK(wav_writer);

+ std::size_t padding_size = interval_begin - wav_writer->num_samples();

+ RTC_CHECK(padding_size >= 0);

minyue-webrtc 2017/04/07 13:24:30 size_t never is negative RTC_CHECK_GE(interval_be

AleBzk 2017/04/10 08:24:48 Done.

+ if (padding_size > 0) {

minyue-webrtc 2017/04/07 13:24:30 no need for "if" if you check before

AleBzk 2017/04/10 08:24:48 I still need it because padding_size can be 0. If

+ const std::vector<int16_t> padding(padding_size, 0);

+ wav_writer->WriteSamples(padding.data(), padding_size);

+ }

+ // Write source samples.

+ wav_writer->WriteSamples(source_samples.data(), source_samples.size());

+// Appends zeros via wav_writer. The number of zeros is always non-negative and

+// equal to the difference between the previously written samples and

+// duration_samples.

+void PadRight(WavWriter* wav_writer, std::size_t duration_samples) {

minyue-webrtc 2017/04/07 13:24:30 duration is normally a time concept. Say either du

minyue-webrtc 2017/04/07 13:24:31 This looks similar to PadLeftWriteChunk but functi

AleBzk 2017/04/10 08:24:48 duration_samples -> pad_samples done PadRight ->

+ RTC_CHECK(wav_writer);

+ std::size_t padding_size = duration_samples - wav_writer->num_samples();

+ RTC_CHECK(padding_size >= 0);

minyue-webrtc 2017/04/07 13:24:31 again, checking something always true :)

AleBzk 2017/04/10 08:24:48 Done.

+ if (padding_size > 0) {

+ const std::vector<int16_t> padding(padding_size, 0);

+ wav_writer->WriteSamples(padding.data(), padding_size);

+ }

+} // namespace

+namespace conversational_speech {

+std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate(

minyue-webrtc 2017/04/07 13:24:30 Are you not gonna write this in any class?

minyue-webrtc 2017/04/07 13:24:31 and same comment on unique_ptr<map>

AleBzk 2017/04/10 08:24:48 Are you not gonna write this in any class? No. In

+ const MultiEndCall& multiend_call, const std::string& output_path) {

+ // Set output file paths and initialize wav writers.

+ const auto& speaker_names = multiend_call.speaker_names();

+ auto speaker_output_file_paths = InitSpeakerOutputFilePaths(

+ speaker_names, output_path);

+ auto speakers_wav_writers = InitSpeakersWavWriters(

+ *speaker_output_file_paths, multiend_call.sample_rate());

+ // Preload all the input audio tracks.

+ const auto& audiotrack_readers = multiend_call.audiotrack_readers();

+ auto audiotracks = PreloadAudioTracks(audiotrack_readers);

+ // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end

+ // across the 2 speakers are symmetric; hence, the code below could be

+ // replaced by only creating the near-end or the far-end. However, this would

+ // require to split the unit tests and document the behavior in README.md.

+ // In practice, it should not be an issue since the files are not expected to

+ // be signinificant.

+ // Write near-end and far-end output tracks.

+ const auto& speaking_turns = multiend_call.speaking_turns();

+ for (const auto& speaking_turn : speaking_turns) {

+ const std::string& active_speaker_name = speaking_turn.speaker_name;

+ auto source_audiotrack = audiotracks->at(

+ speaking_turn.audiotrack_file_name);

+ // Write active speaker's chunk to active speaker's near-end.

+ PadLeftWriteChunk(source_audiotrack, speaking_turn.begin,

+ &speakers_wav_writers->at(

+ active_speaker_name).near_end());

+ // Write active speaker's chunk to other participants' far-ends.

+ for (const std::string& speaker_name : speaker_names) {

+ if (speaker_name == active_speaker_name)

+ continue;

+ PadLeftWriteChunk(source_audiotrack, speaking_turn.begin,

+ &speakers_wav_writers->at(speaker_name).far_end());

+ }

+ // Finalize all the output tracks with right padding.

+ // This is required to make all the output tracks duration equal.

+ std::size_t duration_samples = multiend_call.total_duration_samples();

+ for (const std::string& speaker_name : speaker_names) {

+ PadRight(&speakers_wav_writers->at(speaker_name).near_end(),

+ duration_samples);

+ PadRight(&speakers_wav_writers->at(speaker_name).far_end(),

+ duration_samples);

+ }

+ return speaker_output_file_paths;

+} // namespace conversational_speech

+} // namespace test

+} // namespace webrtc