Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1011)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: rebase Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
12 12
13 #include <utility> 13 #include "webrtc/base/logging.h"
14
15 #include "webrtc/base/pathutils.h" 14 #include "webrtc/base/pathutils.h"
16 15
17 namespace webrtc { 16 namespace webrtc {
18 namespace test { 17 namespace test {
19 namespace conversational_speech { 18 namespace conversational_speech {
20 19
21 MultiEndCall::MultiEndCall( 20 MultiEndCall::MultiEndCall(
22 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path, 21 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,
23 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory) 22 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)
24 : timing_(timing), audiotracks_path_(audiotracks_path), 23 : timing_(timing), audiotracks_path_(audiotracks_path),
25 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) { 24 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {
26 FindSpeakerNames(); 25 FindSpeakerNames();
27 CreateAudioTrackReaders(); 26 CreateAudioTrackReaders();
28 CheckTiming(); 27 valid_ = CheckTiming();
29 } 28 }
30 29
31 MultiEndCall::~MultiEndCall() = default; 30 MultiEndCall::~MultiEndCall() = default;
32 31
33 const std::set<std::string>& MultiEndCall::speaker_names() const { 32 const std::set<std::string>& MultiEndCall::speaker_names() const {
34 return speaker_names_; 33 return speaker_names_;
35 } 34 }
36 35
37 const std::map<std::string, std::unique_ptr<WavReaderInterface>>& 36 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
38 MultiEndCall::audiotrack_readers() const { 37 MultiEndCall::audiotrack_readers() const {
39 return audiotrack_readers_; 38 return audiotrack_readers_;
40 } 39 }
41 40
41 bool MultiEndCall::valid() {
42 return valid_;
43 }
44
42 void MultiEndCall::FindSpeakerNames() { 45 void MultiEndCall::FindSpeakerNames() {
43 RTC_DCHECK(speaker_names_.empty()); 46 RTC_DCHECK(speaker_names_.empty());
44 for (const Turn& turn : timing_) { 47 for (const Turn& turn : timing_) {
45 speaker_names_.insert(turn.speaker_name); 48 speaker_names_.insert(turn.speaker_name);
46 } 49 }
47 } 50 }
48 51
49 void MultiEndCall::CreateAudioTrackReaders() { 52 void MultiEndCall::CreateAudioTrackReaders() {
50 RTC_DCHECK(audiotrack_readers_.empty()); 53 RTC_DCHECK(audiotrack_readers_.empty());
51 for (const Turn& turn : timing_) { 54 for (const Turn& turn : timing_) {
52 auto it = audiotrack_readers_.find(turn.audiotrack_file_name); 55 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
53 if (it != audiotrack_readers_.end()) 56 if (it != audiotrack_readers_.end())
54 continue; 57 continue;
55 58
56 // Instance Pathname to retrieve the full path to the audiotrack file. 59 // Instance Pathname to retrieve the full path to the audiotrack file.
57 const rtc::Pathname audiotrack_file_path( 60 const rtc::Pathname audiotrack_file_path(
58 audiotracks_path_, turn.audiotrack_file_name); 61 audiotracks_path_, turn.audiotrack_file_name);
59 62
60 // Map the audiotrack file name to a new instance of WavReaderInterface. 63 // Map the audiotrack file name to a new instance of WavReaderInterface.
61 std::unique_ptr<WavReaderInterface> wavreader = 64 std::unique_ptr<WavReaderInterface> wavreader =
62 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname()); 65 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());
63 audiotrack_readers_.insert(std::make_pair( 66 audiotrack_readers_.insert(std::make_pair(
64 turn.audiotrack_file_name, std::move(wavreader))); 67 turn.audiotrack_file_name, std::move(wavreader)));
65 } 68 }
66 } 69 }
67 70
68 void MultiEndCall::CheckTiming() { 71 bool MultiEndCall::CheckTiming() const {
69 // TODO(alessiob): use audiotrack lengths and offset to check whether the 72 std::size_t number_of_turns = timing_.size();
70 // timing is valid. 73 auto millisecond_to_samples = [](int ms, int sr) -> int {
74 return ms * sr / 1000;
75 };
76 auto in_interval = [](std::size_t value, const Interval& interval) {
77 return interval.first <= value && value < interval.second;
78 };
79
80 // Begin and end timestamps for the last two turns (unit: number of samples).
81 Interval second_last_turn = {0, 0};
82 Interval last_turn = {0, 0};
83
84 // Initialize map to store turn intervals of each speaker (used to detect self
85 // cross-talk).
86 std::map<std::string, std::unique_ptr<IntervalsVector>> speakers_intervals;
87 for (const std::string& speaker_name : speaker_names_) {
88 // Initialize a vector.
89 speakers_intervals.insert(std::make_pair(
90 speaker_name, std::unique_ptr<IntervalsVector>(
91 new IntervalsVector())));
92 LOG(LS_VERBOSE) << "speaker_intervals vector for <" << speaker_name
93 << "> preallocated (capacity: "
94 << speakers_intervals[speaker_name]->capacity() << ")";
95 }
96
97 // Parse turns.
98 for (std::size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
99 const Turn& turn = timing_[turn_index];
100 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
101 RTC_CHECK(it != audiotrack_readers_.end())
102 << "Audio track reader not created";
103
104 // Begin and end timestamps for the current turn.
105 int offset_samples = millisecond_to_samples(
106 turn.offset, it->second->sample_rate());
107 std::size_t begin_timestamp = last_turn.second + offset_samples;
108 std::size_t end_timestamp = begin_timestamp + it->second->num_samples();
109 LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp
110 << "-" << end_timestamp << " ms";
111
112 // The order is invalid if the offset is negative and its absolute value is
113 // larger then the duration of the previous turn.
114 if (offset_samples < 0 && -offset_samples > int(
115 last_turn.second - last_turn.first)) {
116 LOG(LS_ERROR) << "invalid order";
117 return false;
118 }
119
120 // Cross-talk with 3 or more speakers occurs when the beginning of the
121 // current interval falls in the last two turns.
122 if (turn_index > 1 && in_interval(begin_timestamp, last_turn)
123 && in_interval(begin_timestamp, second_last_turn)) {
124 LOG(LS_ERROR) << "cross-talk with 3+ speakers";
125 return false;
126 }
127
128 // Save speaker turn interval.
129 Interval current_turn = {begin_timestamp, end_timestamp};
130 speakers_intervals[turn.speaker_name]->push_back(current_turn);
131
132 // Update and continue with next turn.
133 second_last_turn = last_turn;
134 last_turn = current_turn;
135 }
136
137 // Detect self cross-talk.
138 for (const std::string& speaker_name : speaker_names_) {
139 LOG(LS_INFO) << "checking self cross-talk for <"
140 << speaker_name << ">";
141 if (DetectSelfCrossTalk(speakers_intervals[speaker_name].get())) {
142 LOG(LS_ERROR) << "Self cross-talk detected";
143 return false;
144 }
145 }
146
147 return true;
148 }
149
150 bool MultiEndCall::DetectSelfCrossTalk(IntervalsVector* speaker_intervals)
151 const {
152 Interval previous_interval = speaker_intervals->at(0);
153 LOG(LS_VERBOSE) << "#0" << ": " << previous_interval.first << " "
154 << previous_interval.second;
155 for (std::size_t index = 1; index < speaker_intervals->size(); ++index) {
156 auto interval = speaker_intervals->at(index);
157 LOG(LS_VERBOSE) << "#" << index << ": " << interval.first << " "
158 << interval.second;
159
160 // Check if there is overlap with the previous interval.
161 // This is a sufficient condition for self cross-talk since the intervals
162 // are sorted by begin timestamp.
163 if (previous_interval.second > interval.first) {
164 return true;
165 }
166
167 // Update and continue with next turn.
168 previous_interval = interval;
169 }
170 return false;
71 } 171 }
72 172
73 } // namespace conversational_speech 173 } // namespace conversational_speech
74 } // namespace test 174 } // namespace test
75 } // namespace webrtc 175 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698