Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(217)

Unified Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: rebase Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
index f16aa753fa427a42b7def1298c145a52f3154afd..ba36514e779e916108b43b5c8bde4bb413bd6b08 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
@@ -10,8 +10,7 @@
#include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"
-#include <utility>
-
+#include "webrtc/base/logging.h"
#include "webrtc/base/pathutils.h"
namespace webrtc {
@@ -25,7 +24,7 @@ MultiEndCall::MultiEndCall(
wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {
FindSpeakerNames();
CreateAudioTrackReaders();
- CheckTiming();
+ valid_ = CheckTiming();
}
MultiEndCall::~MultiEndCall() = default;
@@ -39,6 +38,10 @@ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
return audiotrack_readers_;
}
+bool MultiEndCall::valid() {
+ return valid_;
+}
+
void MultiEndCall::FindSpeakerNames() {
RTC_DCHECK(speaker_names_.empty());
for (const Turn& turn : timing_) {
@@ -65,9 +68,106 @@ void MultiEndCall::CreateAudioTrackReaders() {
}
}
-void MultiEndCall::CheckTiming() {
- // TODO(alessiob): use audiotrack lengths and offset to check whether the
- // timing is valid.
+bool MultiEndCall::CheckTiming() const {
+ std::size_t number_of_turns = timing_.size();
+ auto millisecond_to_samples = [](int ms, int sr) -> int {
+ return ms * sr / 1000;
+ };
+ auto in_interval = [](std::size_t value, const Interval& interval) {
+ return interval.first <= value && value < interval.second;
+ };
+
+ // Begin and end timestamps for the last two turns (unit: number of samples).
+ Interval second_last_turn = {0, 0};
+ Interval last_turn = {0, 0};
+
+ // Initialize map to store turn intervals of each speaker (used to detect self
+ // cross-talk).
+ std::map<std::string, std::unique_ptr<IntervalsVector>> speakers_intervals;
+ for (const std::string& speaker_name : speaker_names_) {
+ // Initialize a vector.
+ speakers_intervals.insert(std::make_pair(
+ speaker_name, std::unique_ptr<IntervalsVector>(
+ new IntervalsVector())));
+ LOG(LS_VERBOSE) << "speaker_intervals vector for <" << speaker_name
+ << "> preallocated (capacity: "
+ << speakers_intervals[speaker_name]->capacity() << ")";
+ }
+
+ // Parse turns.
+ for (std::size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
+ const Turn& turn = timing_[turn_index];
+ auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
+ RTC_CHECK(it != audiotrack_readers_.end())
+ << "Audio track reader not created";
+
+ // Begin and end timestamps for the current turn.
+ int offset_samples = millisecond_to_samples(
+ turn.offset, it->second->sample_rate());
+ std::size_t begin_timestamp = last_turn.second + offset_samples;
+ std::size_t end_timestamp = begin_timestamp + it->second->num_samples();
+ LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp
+ << "-" << end_timestamp << " ms";
+
+ // The order is invalid if the offset is negative and its absolute value is
+ // larger then the duration of the previous turn.
+ if (offset_samples < 0 && -offset_samples > int(
+ last_turn.second - last_turn.first)) {
+ LOG(LS_ERROR) << "invalid order";
+ return false;
+ }
+
+ // Cross-talk with 3 or more speakers occurs when the beginning of the
+ // current interval falls in the last two turns.
+ if (turn_index > 1 && in_interval(begin_timestamp, last_turn)
+ && in_interval(begin_timestamp, second_last_turn)) {
+ LOG(LS_ERROR) << "cross-talk with 3+ speakers";
+ return false;
+ }
+
+ // Save speaker turn interval.
+ Interval current_turn = {begin_timestamp, end_timestamp};
+ speakers_intervals[turn.speaker_name]->push_back(current_turn);
+
+ // Update and continue with next turn.
+ second_last_turn = last_turn;
+ last_turn = current_turn;
+ }
+
+ // Detect self cross-talk.
+ for (const std::string& speaker_name : speaker_names_) {
+ LOG(LS_INFO) << "checking self cross-talk for <"
+ << speaker_name << ">";
+ if (DetectSelfCrossTalk(speakers_intervals[speaker_name].get())) {
+ LOG(LS_ERROR) << "Self cross-talk detected";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool MultiEndCall::DetectSelfCrossTalk(IntervalsVector* speaker_intervals)
+ const {
+ Interval previous_interval = speaker_intervals->at(0);
+ LOG(LS_VERBOSE) << "#0" << ": " << previous_interval.first << " "
+ << previous_interval.second;
+ for (std::size_t index = 1; index < speaker_intervals->size(); ++index) {
+ auto interval = speaker_intervals->at(index);
+ LOG(LS_VERBOSE) << "#" << index << ": " << interval.first << " "
+ << interval.second;
+
+ // Check if there is overlap with the previous interval.
+ // This is a sufficient condition for self cross-talk since the intervals
+ // are sorted by begin timestamp.
+ if (previous_interval.second > interval.first) {
+ return true;
+ }
+
+ // Update and continue with next turn.
+ previous_interval = interval;
+ }
+ return false;
}
} // namespace conversational_speech

Powered by Google App Engine
This is Rietveld 408576698