Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(115)

Unified Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: final refactoring Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
diff --git a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
index f16aa753fa427a42b7def1298c145a52f3154afd..8fe43aa1dbd290507039a788d42504723dd3fba6 100644
--- a/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
+++ b/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc
@@ -10,8 +10,7 @@
#include "webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h"
-#include <utility>
-
+#include "webrtc/base/logging.h"
#include "webrtc/base/pathutils.h"
namespace webrtc {
@@ -25,7 +24,7 @@ MultiEndCall::MultiEndCall(
wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {
FindSpeakerNames();
CreateAudioTrackReaders();
- CheckTiming();
+ valid_ = CheckTiming();
}
MultiEndCall::~MultiEndCall() = default;
@@ -39,10 +38,23 @@ const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
return audiotrack_readers_;
}
+bool MultiEndCall::valid() const {
+ return valid_;
+}
+
+std::size_t MultiEndCall::total_duration_samples() const {
+ return total_duration_samples_;
+}
+
+const std::vector<MultiEndCall::SpeakingTurn>& MultiEndCall::speaking_turns()
+ const {
+ return speaking_turns_;
+}
+
void MultiEndCall::FindSpeakerNames() {
RTC_DCHECK(speaker_names_.empty());
for (const Turn& turn : timing_) {
- speaker_names_.insert(turn.speaker_name);
+ speaker_names_.emplace(turn.speaker_name);
}
}
@@ -60,14 +72,118 @@ void MultiEndCall::CreateAudioTrackReaders() {
// Map the audiotrack file name to a new instance of WavReaderInterface.
std::unique_ptr<WavReaderInterface> wavreader =
wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());
- audiotrack_readers_.insert(std::make_pair(
- turn.audiotrack_file_name, std::move(wavreader)));
+ audiotrack_readers_.emplace(
+ turn.audiotrack_file_name, std::move(wavreader));
}
}
-void MultiEndCall::CheckTiming() {
- // TODO(alessiob): use audiotrack lengths and offset to check whether the
- // timing is valid.
+bool MultiEndCall::CheckTiming() {
+ struct Interval {
+ std::size_t begin;
hlundin-webrtc 2017/04/06 08:10:04 size_t Here and below.
AleBzk 2017/04/06 16:42:42 Done.
+ std::size_t end;
+ };
+ std::size_t number_of_turns = timing_.size();
+ auto millisecond_to_samples = [](int ms, int sr) -> int {
+ return ms * sr / 1000;
hlundin-webrtc 2017/04/06 08:10:04 I'd recommend rtc::CheckedDivExact(sr, 1000)
AleBzk 2017/04/06 16:42:42 If I do that, the tool won't work if the sampling
hlundin-webrtc 2017/04/07 10:24:09 Oh, the tool should be able to handle other rates
AleBzk 2017/04/07 11:37:06 Done.
+ };
+ auto in_interval = [](std::size_t value, const Interval& interval) {
+ return interval.begin <= value && value < interval.end;
+ };
+ total_duration_samples_ = 0;
+
+ // Begin and end timestamps for the last two turns (unit: number of samples).
+ Interval second_last_turn = {0, 0};
+ Interval last_turn = {0, 0};
+
+ // Initialize map to store speaking turn indices of each speaker (used to
+ // detect self cross-talk).
+ std::map<std::string, std::vector<std::size_t>> speaking_turn_indices;
+ for (const std::string& speaker_name : speaker_names_) {
+ speaking_turn_indices.emplace(
+ std::piecewise_construct,
+ std::forward_as_tuple(speaker_name),
+ std::forward_as_tuple());
+ }
+
+ // Parse turns.
+ for (std::size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
+ const Turn& turn = timing_[turn_index];
+ auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
+ RTC_CHECK(it != audiotrack_readers_.end())
hlundin-webrtc 2017/04/06 08:10:04 RTC_CHECK_NE
AleBzk 2017/04/06 16:42:42 RTC_CHECK_NE(it, audiotrack_readers_.end()) raises
hlundin-webrtc 2017/04/07 10:24:09 Hmm. Boring. Keep this as is then.
AleBzk 2017/04/07 11:37:06 Acknowledged.
+ << "Audio track reader not created";
+
+ // Begin and end timestamps for the current turn.
+ int offset_samples = millisecond_to_samples(
+ turn.offset, it->second->sample_rate());
+ std::size_t begin_timestamp = last_turn.end + offset_samples;
+ std::size_t end_timestamp = begin_timestamp + it->second->num_samples();
+ LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp
+ << "-" << end_timestamp << " ms";
+
+ // The order is invalid if the offset is negative and its absolute value is
+ // larger then the duration of the previous turn.
+ if (offset_samples < 0 && -offset_samples > int(
hlundin-webrtc 2017/04/06 08:10:04 static_cast<int>(last_turn.end - last_turn.begin)
AleBzk 2017/04/06 16:42:42 Done.
+ last_turn.end - last_turn.begin)) {
+ LOG(LS_ERROR) << "invalid order";
+ return false;
+ }
+
+ // Cross-talk with 3 or more speakers occurs when the beginning of the
+ // current interval falls in the last two turns.
+ if (turn_index > 1 && in_interval(begin_timestamp, last_turn)
+ && in_interval(begin_timestamp, second_last_turn)) {
+ LOG(LS_ERROR) << "cross-talk with 3+ speakers";
+ return false;
+ }
+
+ // Append turn.
+ speaking_turns_.emplace_back(
+ turn.speaker_name, turn.audiotrack_file_name,
+ begin_timestamp, end_timestamp);
+
+ // Save speaking turn index for self cross-talk detection.
+ speaking_turn_indices[turn.speaker_name].push_back(turn_index);
hlundin-webrtc 2017/04/06 08:10:04 You are relying on an implicit assumption that tur
AleBzk 2017/04/06 16:42:42 Done.
+
+ // Update total duration of the consversational speech.
+ if (total_duration_samples_ < end_timestamp)
+ total_duration_samples_ = end_timestamp;
+
+ // Update and continue with next turn.
+ second_last_turn = last_turn;
+ last_turn.begin = begin_timestamp;
+ last_turn.end = end_timestamp;
+ }
+
+ // Detect self cross-talk.
+ for (const std::string& speaker_name : speaker_names_) {
hlundin-webrtc 2017/04/06 08:10:04 The speaking_turn_indices variable is only used fo
AleBzk 2017/04/06 16:42:42 Cool! Happy to learn about std::copy_if and std::a
+ LOG(LS_INFO) << "checking self cross-talk for <"
+ << speaker_name << ">";
+ if (DetectSelfCrossTalk(speaking_turn_indices[speaker_name])) {
hlundin-webrtc 2017/04/06 08:10:04 It is a bit tricky to use the map::[] operator her
AleBzk 2017/04/06 16:42:42 I'll go the way you suggested in your previous com
+ LOG(LS_ERROR) << "Self cross-talk detected";
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool MultiEndCall::DetectSelfCrossTalk(
+ const std::vector<std::size_t>& speaking_turn_indices) const {
+ // Compare adjacent speaking turn pairs.
+ for (std::size_t index = 1; index < speaking_turn_indices.size(); ++index) {
+ const SpeakingTurn& previous_interval = speaking_turns_[
+ speaking_turn_indices[index - 1]];
+ const SpeakingTurn& interval = speaking_turns_[
+ speaking_turn_indices[index]];
+
+ // Check if there is overlap with the previous interval.
+ // This is a sufficient condition for self cross-talk since the intervals
+ // are sorted by begin timestamp.
+ if (previous_interval.end > interval.begin) {
+ return true;
+ }
+ }
+ return false;
}
} // namespace conversational_speech

Powered by Google App Engine
This is Rietveld 408576698