Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: final refactoring Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
12 12
13 #include <utility> 13 #include "webrtc/base/logging.h"
14
15 #include "webrtc/base/pathutils.h" 14 #include "webrtc/base/pathutils.h"
16 15
17 namespace webrtc { 16 namespace webrtc {
18 namespace test { 17 namespace test {
19 namespace conversational_speech { 18 namespace conversational_speech {
20 19
21 MultiEndCall::MultiEndCall( 20 MultiEndCall::MultiEndCall(
22 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path, 21 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,
23 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory) 22 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)
24 : timing_(timing), audiotracks_path_(audiotracks_path), 23 : timing_(timing), audiotracks_path_(audiotracks_path),
25 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) { 24 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {
26 FindSpeakerNames(); 25 FindSpeakerNames();
27 CreateAudioTrackReaders(); 26 CreateAudioTrackReaders();
28 CheckTiming(); 27 valid_ = CheckTiming();
29 } 28 }
30 29
31 MultiEndCall::~MultiEndCall() = default; 30 MultiEndCall::~MultiEndCall() = default;
32 31
33 const std::set<std::string>& MultiEndCall::speaker_names() const { 32 const std::set<std::string>& MultiEndCall::speaker_names() const {
34 return speaker_names_; 33 return speaker_names_;
35 } 34 }
36 35
37 const std::map<std::string, std::unique_ptr<WavReaderInterface>>& 36 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
38 MultiEndCall::audiotrack_readers() const { 37 MultiEndCall::audiotrack_readers() const {
39 return audiotrack_readers_; 38 return audiotrack_readers_;
40 } 39 }
41 40
41 bool MultiEndCall::valid() const {
42 return valid_;
43 }
44
45 std::size_t MultiEndCall::total_duration_samples() const {
46 return total_duration_samples_;
47 }
48
49 const std::vector<MultiEndCall::SpeakingTurn>& MultiEndCall::speaking_turns()
50 const {
51 return speaking_turns_;
52 }
53
42 void MultiEndCall::FindSpeakerNames() { 54 void MultiEndCall::FindSpeakerNames() {
43 RTC_DCHECK(speaker_names_.empty()); 55 RTC_DCHECK(speaker_names_.empty());
44 for (const Turn& turn : timing_) { 56 for (const Turn& turn : timing_) {
45 speaker_names_.insert(turn.speaker_name); 57 speaker_names_.emplace(turn.speaker_name);
46 } 58 }
47 } 59 }
48 60
49 void MultiEndCall::CreateAudioTrackReaders() { 61 void MultiEndCall::CreateAudioTrackReaders() {
50 RTC_DCHECK(audiotrack_readers_.empty()); 62 RTC_DCHECK(audiotrack_readers_.empty());
51 for (const Turn& turn : timing_) { 63 for (const Turn& turn : timing_) {
52 auto it = audiotrack_readers_.find(turn.audiotrack_file_name); 64 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
53 if (it != audiotrack_readers_.end()) 65 if (it != audiotrack_readers_.end())
54 continue; 66 continue;
55 67
56 // Instance Pathname to retrieve the full path to the audiotrack file. 68 // Instance Pathname to retrieve the full path to the audiotrack file.
57 const rtc::Pathname audiotrack_file_path( 69 const rtc::Pathname audiotrack_file_path(
58 audiotracks_path_, turn.audiotrack_file_name); 70 audiotracks_path_, turn.audiotrack_file_name);
59 71
60 // Map the audiotrack file name to a new instance of WavReaderInterface. 72 // Map the audiotrack file name to a new instance of WavReaderInterface.
61 std::unique_ptr<WavReaderInterface> wavreader = 73 std::unique_ptr<WavReaderInterface> wavreader =
62 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname()); 74 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());
63 audiotrack_readers_.insert(std::make_pair( 75 audiotrack_readers_.emplace(
64 turn.audiotrack_file_name, std::move(wavreader))); 76 turn.audiotrack_file_name, std::move(wavreader));
65 } 77 }
66 } 78 }
67 79
68 void MultiEndCall::CheckTiming() { 80 bool MultiEndCall::CheckTiming() {
69 // TODO(alessiob): use audiotrack lengths and offset to check whether the 81 struct Interval {
70 // timing is valid. 82 std::size_t begin;
hlundin-webrtc 2017/04/06 08:10:04 size_t Here and below.
AleBzk 2017/04/06 16:42:42 Done.
83 std::size_t end;
84 };
85 std::size_t number_of_turns = timing_.size();
86 auto millisecond_to_samples = [](int ms, int sr) -> int {
87 return ms * sr / 1000;
hlundin-webrtc 2017/04/06 08:10:04 I'd recommend rtc::CheckedDivExact(sr, 1000)
AleBzk 2017/04/06 16:42:42 If I do that, the tool won't work if the sampling
hlundin-webrtc 2017/04/07 10:24:09 Oh, the tool should be able to handle other rates
AleBzk 2017/04/07 11:37:06 Done.
88 };
89 auto in_interval = [](std::size_t value, const Interval& interval) {
90 return interval.begin <= value && value < interval.end;
91 };
92 total_duration_samples_ = 0;
93
94 // Begin and end timestamps for the last two turns (unit: number of samples).
95 Interval second_last_turn = {0, 0};
96 Interval last_turn = {0, 0};
97
98 // Initialize map to store speaking turn indices of each speaker (used to
99 // detect self cross-talk).
100 std::map<std::string, std::vector<std::size_t>> speaking_turn_indices;
101 for (const std::string& speaker_name : speaker_names_) {
102 speaking_turn_indices.emplace(
103 std::piecewise_construct,
104 std::forward_as_tuple(speaker_name),
105 std::forward_as_tuple());
106 }
107
108 // Parse turns.
109 for (std::size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
110 const Turn& turn = timing_[turn_index];
111 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
112 RTC_CHECK(it != audiotrack_readers_.end())
hlundin-webrtc 2017/04/06 08:10:04 RTC_CHECK_NE
AleBzk 2017/04/06 16:42:42 RTC_CHECK_NE(it, audiotrack_readers_.end()) raises
hlundin-webrtc 2017/04/07 10:24:09 Hmm. Boring. Keep this as is then.
AleBzk 2017/04/07 11:37:06 Acknowledged.
113 << "Audio track reader not created";
114
115 // Begin and end timestamps for the current turn.
116 int offset_samples = millisecond_to_samples(
117 turn.offset, it->second->sample_rate());
118 std::size_t begin_timestamp = last_turn.end + offset_samples;
119 std::size_t end_timestamp = begin_timestamp + it->second->num_samples();
120 LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp
121 << "-" << end_timestamp << " ms";
122
123 // The order is invalid if the offset is negative and its absolute value is
124 // larger then the duration of the previous turn.
125 if (offset_samples < 0 && -offset_samples > int(
hlundin-webrtc 2017/04/06 08:10:04 static_cast<int>(last_turn.end - last_turn.begin)
AleBzk 2017/04/06 16:42:42 Done.
126 last_turn.end - last_turn.begin)) {
127 LOG(LS_ERROR) << "invalid order";
128 return false;
129 }
130
131 // Cross-talk with 3 or more speakers occurs when the beginning of the
132 // current interval falls in the last two turns.
133 if (turn_index > 1 && in_interval(begin_timestamp, last_turn)
134 && in_interval(begin_timestamp, second_last_turn)) {
135 LOG(LS_ERROR) << "cross-talk with 3+ speakers";
136 return false;
137 }
138
139 // Append turn.
140 speaking_turns_.emplace_back(
141 turn.speaker_name, turn.audiotrack_file_name,
142 begin_timestamp, end_timestamp);
143
144 // Save speaking turn index for self cross-talk detection.
145 speaking_turn_indices[turn.speaker_name].push_back(turn_index);
hlundin-webrtc 2017/04/06 08:10:04 You are relying on an implicit assumption that tur
AleBzk 2017/04/06 16:42:42 Done.
146
147 // Update total duration of the consversational speech.
148 if (total_duration_samples_ < end_timestamp)
149 total_duration_samples_ = end_timestamp;
150
151 // Update and continue with next turn.
152 second_last_turn = last_turn;
153 last_turn.begin = begin_timestamp;
154 last_turn.end = end_timestamp;
155 }
156
157 // Detect self cross-talk.
158 for (const std::string& speaker_name : speaker_names_) {
hlundin-webrtc 2017/04/06 08:10:04 The speaking_turn_indices variable is only used fo
AleBzk 2017/04/06 16:42:42 Cool! Happy to learn about std::copy_if and std::a
159 LOG(LS_INFO) << "checking self cross-talk for <"
160 << speaker_name << ">";
161 if (DetectSelfCrossTalk(speaking_turn_indices[speaker_name])) {
hlundin-webrtc 2017/04/06 08:10:04 It is a bit tricky to use the map::[] operator her
AleBzk 2017/04/06 16:42:42 I'll go the way you suggested in your previous com
162 LOG(LS_ERROR) << "Self cross-talk detected";
163 return false;
164 }
165 }
166
167 return true;
168 }
169
170 bool MultiEndCall::DetectSelfCrossTalk(
171 const std::vector<std::size_t>& speaking_turn_indices) const {
172 // Compare adjacent speaking turn pairs.
173 for (std::size_t index = 1; index < speaking_turn_indices.size(); ++index) {
174 const SpeakingTurn& previous_interval = speaking_turns_[
175 speaking_turn_indices[index - 1]];
176 const SpeakingTurn& interval = speaking_turns_[
177 speaking_turn_indices[index]];
178
179 // Check if there is overlap with the previous interval.
180 // This is a sufficient condition for self cross-talk since the intervals
181 // are sorted by begin timestamp.
182 if (previous_interval.end > interval.begin) {
183 return true;
184 }
185 }
186 return false;
71 } 187 }
72 188
73 } // namespace conversational_speech 189 } // namespace conversational_speech
74 } // namespace test 190 } // namespace test
75 } // namespace webrtc 191 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698