webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc - Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests

Unified Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)

Patch Set: rebase Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc ('K') | « webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

diff --git a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

index f16aa753fa427a42b7def1298c145a52f3154afd..ba36514e779e916108b43b5c8bde4bb413bd6b08 100644

--- a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

+++ b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

@@ -10,8 +10,7 @@

#include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"

-#include <utility>

+#include "webrtc/base/logging.h"

#include "webrtc/base/pathutils.h"

namespace webrtc {

@@ -25,7 +24,7 @@ MultiEndCall::MultiEndCall(

wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {

FindSpeakerNames();

CreateAudioTrackReaders();

- CheckTiming();

+ valid_ = CheckTiming();

}

MultiEndCall::~MultiEndCall() = default;

@@ -39,6 +38,10 @@ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&

return audiotrack_readers_;

}

+bool MultiEndCall::valid() {

+ return valid_;

void MultiEndCall::FindSpeakerNames() {

RTC_DCHECK(speaker_names_.empty());

for (const Turn& turn : timing_) {

@@ -65,9 +68,106 @@ void MultiEndCall::CreateAudioTrackReaders() {

}

-void MultiEndCall::CheckTiming() {

- // TODO(alessiob): use audiotrack lengths and offset to check whether the

- // timing is valid.

+bool MultiEndCall::CheckTiming() const {

+ std::size_t number_of_turns = timing_.size();

+ auto millisecond_to_samples = [](int ms, int sr) -> int {

+ return ms * sr / 1000;

+ };

+ auto in_interval = [](std::size_t value, const Interval& interval) {

+ return interval.first <= value && value < interval.second;

+ };

+ // Begin and end timestamps for the last two turns (unit: number of samples).

+ Interval second_last_turn = {0, 0};

+ Interval last_turn = {0, 0};

+ // Initialize map to store turn intervals of each speaker (used to detect self

+ // cross-talk).

+ std::map<std::string, std::unique_ptr<IntervalsVector>> speakers_intervals;

+ for (const std::string& speaker_name : speaker_names_) {

+ // Initialize a vector.

+ speakers_intervals.insert(std::make_pair(

+ speaker_name, std::unique_ptr<IntervalsVector>(

+ new IntervalsVector())));

+ LOG(LS_VERBOSE) << "speaker_intervals vector for <" << speaker_name

+ << "> preallocated (capacity: "

+ << speakers_intervals[speaker_name]->capacity() << ")";

+ }

+ // Parse turns.

+ for (std::size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {

+ const Turn& turn = timing_[turn_index];

+ auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

+ RTC_CHECK(it != audiotrack_readers_.end())

+ << "Audio track reader not created";

+ // Begin and end timestamps for the current turn.

+ int offset_samples = millisecond_to_samples(

+ turn.offset, it->second->sample_rate());

+ std::size_t begin_timestamp = last_turn.second + offset_samples;

+ std::size_t end_timestamp = begin_timestamp + it->second->num_samples();

+ LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp

+ << "-" << end_timestamp << " ms";

+ // The order is invalid if the offset is negative and its absolute value is

+ // larger then the duration of the previous turn.

+ if (offset_samples < 0 && -offset_samples > int(

+ last_turn.second - last_turn.first)) {

+ LOG(LS_ERROR) << "invalid order";

+ return false;

+ }

+ // Cross-talk with 3 or more speakers occurs when the beginning of the

+ // current interval falls in the last two turns.

+ if (turn_index > 1 && in_interval(begin_timestamp, last_turn)

+ && in_interval(begin_timestamp, second_last_turn)) {

+ LOG(LS_ERROR) << "cross-talk with 3+ speakers";

+ return false;

+ }

+ // Save speaker turn interval.

+ Interval current_turn = {begin_timestamp, end_timestamp};

+ speakers_intervals[turn.speaker_name]->push_back(current_turn);

+ // Update and continue with next turn.

+ second_last_turn = last_turn;

+ last_turn = current_turn;

+ }

+ // Detect self cross-talk.

+ for (const std::string& speaker_name : speaker_names_) {

+ LOG(LS_INFO) << "checking self cross-talk for <"

+ << speaker_name << ">";

+ if (DetectSelfCrossTalk(speakers_intervals[speaker_name].get())) {

+ LOG(LS_ERROR) << "Self cross-talk detected";

+ return false;

+ }

+ return true;

+bool MultiEndCall::DetectSelfCrossTalk(IntervalsVector* speaker_intervals)

+ const {

+ Interval previous_interval = speaker_intervals->at(0);

+ LOG(LS_VERBOSE) << "#0" << ": " << previous_interval.first << " "

+ << previous_interval.second;

+ for (std::size_t index = 1; index < speaker_intervals->size(); ++index) {

+ auto interval = speaker_intervals->at(index);

+ LOG(LS_VERBOSE) << "#" << index << ": " << interval.first << " "

+ << interval.second;

+ // Check if there is overlap with the previous interval.

+ // This is a sufficient condition for self cross-talk since the intervals

+ // are sorted by begin timestamp.

+ if (previous_interval.second > interval.first) {

+ return true;

+ }

+ // Update and continue with next turn.

+ previous_interval = interval;

+ }

+ return false;

}

} // namespace conversational_speech