Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(206)

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)
Patch Set: missing include to get std::back_inserter working on win targets Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h" 11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"
12 12
13 #include <utility> 13 #include <algorithm>
14 #include <iterator>
14 15
16 #include "webrtc/base/logging.h"
15 #include "webrtc/base/pathutils.h" 17 #include "webrtc/base/pathutils.h"
16 18
17 namespace webrtc { 19 namespace webrtc {
18 namespace test { 20 namespace test {
19 namespace conversational_speech { 21 namespace conversational_speech {
20 22
21 MultiEndCall::MultiEndCall( 23 MultiEndCall::MultiEndCall(
22 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path, 24 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,
23 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory) 25 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)
24 : timing_(timing), audiotracks_path_(audiotracks_path), 26 : timing_(timing), audiotracks_path_(audiotracks_path),
25 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) { 27 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {
26 FindSpeakerNames(); 28 FindSpeakerNames();
27 CreateAudioTrackReaders(); 29 CreateAudioTrackReaders();
28 CheckTiming(); 30 valid_ = CheckTiming();
29 } 31 }
30 32
31 MultiEndCall::~MultiEndCall() = default; 33 MultiEndCall::~MultiEndCall() = default;
32 34
33 const std::set<std::string>& MultiEndCall::speaker_names() const { 35 const std::set<std::string>& MultiEndCall::speaker_names() const {
34 return speaker_names_; 36 return speaker_names_;
35 } 37 }
36 38
37 const std::map<std::string, std::unique_ptr<WavReaderInterface>>& 39 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&
38 MultiEndCall::audiotrack_readers() const { 40 MultiEndCall::audiotrack_readers() const {
39 return audiotrack_readers_; 41 return audiotrack_readers_;
40 } 42 }
41 43
44 bool MultiEndCall::valid() const {
45 return valid_;
46 }
47
48 size_t MultiEndCall::total_duration_samples() const {
49 return total_duration_samples_;
50 }
51
52 const std::vector<MultiEndCall::SpeakingTurn>& MultiEndCall::speaking_turns()
53 const {
54 return speaking_turns_;
55 }
56
42 void MultiEndCall::FindSpeakerNames() { 57 void MultiEndCall::FindSpeakerNames() {
43 RTC_DCHECK(speaker_names_.empty()); 58 RTC_DCHECK(speaker_names_.empty());
44 for (const Turn& turn : timing_) { 59 for (const Turn& turn : timing_) {
45 speaker_names_.insert(turn.speaker_name); 60 speaker_names_.emplace(turn.speaker_name);
46 } 61 }
47 } 62 }
48 63
49 void MultiEndCall::CreateAudioTrackReaders() { 64 void MultiEndCall::CreateAudioTrackReaders() {
50 RTC_DCHECK(audiotrack_readers_.empty()); 65 RTC_DCHECK(audiotrack_readers_.empty());
51 for (const Turn& turn : timing_) { 66 for (const Turn& turn : timing_) {
52 auto it = audiotrack_readers_.find(turn.audiotrack_file_name); 67 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
53 if (it != audiotrack_readers_.end()) 68 if (it != audiotrack_readers_.end())
54 continue; 69 continue;
55 70
56 // Instance Pathname to retrieve the full path to the audiotrack file. 71 // Instance Pathname to retrieve the full path to the audiotrack file.
57 const rtc::Pathname audiotrack_file_path( 72 const rtc::Pathname audiotrack_file_path(
58 audiotracks_path_, turn.audiotrack_file_name); 73 audiotracks_path_, turn.audiotrack_file_name);
59 74
60 // Map the audiotrack file name to a new instance of WavReaderInterface. 75 // Map the audiotrack file name to a new instance of WavReaderInterface.
61 std::unique_ptr<WavReaderInterface> wavreader = 76 std::unique_ptr<WavReaderInterface> wavreader =
62 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname()); 77 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());
63 audiotrack_readers_.insert(std::make_pair( 78 audiotrack_readers_.emplace(
64 turn.audiotrack_file_name, std::move(wavreader))); 79 turn.audiotrack_file_name, std::move(wavreader));
65 } 80 }
66 } 81 }
67 82
68 void MultiEndCall::CheckTiming() { 83 bool MultiEndCall::CheckTiming() {
69 // TODO(alessiob): use audiotrack lengths and offset to check whether the 84 struct Interval {
70 // timing is valid. 85 size_t begin;
86 size_t end;
87 };
88 size_t number_of_turns = timing_.size();
89 auto millisecond_to_samples = [](int ms, int sr) -> int {
90 // Truncation may happen if the sampling rate is not an integer multiple
91 // of 1000 (e.g., 44100).
92 return ms * sr / 1000;
93 };
94 auto in_interval = [](size_t value, const Interval& interval) {
95 return interval.begin <= value && value < interval.end;
96 };
97 total_duration_samples_ = 0;
98 speaking_turns_.clear();
99
100 // Begin and end timestamps for the last two turns (unit: number of samples).
101 Interval second_last_turn = {0, 0};
102 Interval last_turn = {0, 0};
103
104 // Initialize map to store speaking turn indices of each speaker (used to
105 // detect self cross-talk).
106 std::map<std::string, std::vector<size_t>> speaking_turn_indices;
107 for (const std::string& speaker_name : speaker_names_) {
108 speaking_turn_indices.emplace(
109 std::piecewise_construct,
110 std::forward_as_tuple(speaker_name),
111 std::forward_as_tuple());
112 }
113
114 // Parse turns.
115 for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {
116 const Turn& turn = timing_[turn_index];
117 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);
118 RTC_CHECK(it != audiotrack_readers_.end())
119 << "Audio track reader not created";
120
121 // Begin and end timestamps for the current turn.
122 int offset_samples = millisecond_to_samples(
123 turn.offset, it->second->sample_rate());
124 size_t begin_timestamp = last_turn.end + offset_samples;
125 size_t end_timestamp = begin_timestamp + it->second->num_samples();
126 LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp
127 << "-" << end_timestamp << " ms";
128
129 // The order is invalid if the offset is negative and its absolute value is
130 // larger then the duration of the previous turn.
131 if (offset_samples < 0 && -offset_samples > static_cast<int>(
132 last_turn.end - last_turn.begin)) {
133 LOG(LS_ERROR) << "invalid order";
134 return false;
135 }
136
137 // Cross-talk with 3 or more speakers occurs when the beginning of the
138 // current interval falls in the last two turns.
139 if (turn_index > 1 && in_interval(begin_timestamp, last_turn)
140 && in_interval(begin_timestamp, second_last_turn)) {
141 LOG(LS_ERROR) << "cross-talk with 3+ speakers";
142 return false;
143 }
144
145 // Append turn.
146 speaking_turns_.emplace_back(
147 turn.speaker_name, turn.audiotrack_file_name,
148 begin_timestamp, end_timestamp);
149
150 // Save speaking turn index for self cross-talk detection.
151 RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1);
152 speaking_turn_indices[turn.speaker_name].push_back(turn_index);
153
154 // Update total duration of the consversational speech.
155 if (total_duration_samples_ < end_timestamp)
156 total_duration_samples_ = end_timestamp;
157
158 // Update and continue with next turn.
159 second_last_turn = last_turn;
160 last_turn.begin = begin_timestamp;
161 last_turn.end = end_timestamp;
162 }
163
164 // Detect self cross-talk.
165 for (const std::string& speaker_name : speaker_names_) {
166 LOG(LS_INFO) << "checking self cross-talk for <"
167 << speaker_name << ">";
168
169 // Copy all turns for this speaker to new vector.
170 std::vector<SpeakingTurn> speaking_turns_for_name;
171 std::copy_if(speaking_turns_.begin(), speaking_turns_.end(),
172 std::back_inserter(speaking_turns_for_name),
173 [&speaker_name](const SpeakingTurn& st){
174 return st.speaker_name == speaker_name; });
175
176 // Check for overlap between adjacent elements.
177 // This is a sufficient condition for self cross-talk since the intervals
178 // are sorted by begin timestamp.
179 auto overlap = std::adjacent_find(
180 speaking_turns_for_name.begin(), speaking_turns_for_name.end(),
181 [](const SpeakingTurn& a, const SpeakingTurn& b) {
182 return a.end > b.begin; });
183
184 if (overlap != speaking_turns_for_name.end()) {
185 LOG(LS_ERROR) << "Self cross-talk detected";
186 return false;
187 }
188 }
189
190 return true;
71 } 191 }
72 192
73 } // namespace conversational_speech 193 } // namespace conversational_speech
74 } // namespace test 194 } // namespace test
75 } // namespace webrtc 195 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698