webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc - Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)

Patch Set: comments from Henrik addressed Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h ('K') | « webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

12	12

13 #include <utility>	13 #include <algorithm>

14	14

	15 #include "webrtc/base/logging.h"

15 #include "webrtc/base/pathutils.h"	16 #include "webrtc/base/pathutils.h"

16	17

17 namespace webrtc {	18 namespace webrtc {

18 namespace test {	19 namespace test {

19 namespace conversational_speech {	20 namespace conversational_speech {

20	21

21 MultiEndCall::MultiEndCall(	22 MultiEndCall::MultiEndCall(

22 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,	23 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,

23 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)	24 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)

24 : timing_(timing), audiotracks_path_(audiotracks_path),	25 : timing_(timing), audiotracks_path_(audiotracks_path),

25 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {	26 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {

26 FindSpeakerNames();	27 FindSpeakerNames();

27 CreateAudioTrackReaders();	28 CreateAudioTrackReaders();

28 CheckTiming();	29 valid_ = CheckTiming();

29 }	30 }

30	31

31 MultiEndCall::~MultiEndCall() = default;	32 MultiEndCall::~MultiEndCall() = default;

32	33

33 const std::set<std::string>& MultiEndCall::speaker_names() const {	34 const std::set<std::string>& MultiEndCall::speaker_names() const {

34 return speaker_names_;	35 return speaker_names_;

35 }	36 }

36	37

37 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&	38 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&

38 MultiEndCall::audiotrack_readers() const {	39 MultiEndCall::audiotrack_readers() const {

39 return audiotrack_readers_;	40 return audiotrack_readers_;

40 }	41 }

41	42

	43 bool MultiEndCall::valid() const {

	44 return valid_;

	45 }

	46

	47 size_t MultiEndCall::total_duration_samples() const {

	48 return total_duration_samples_;

	49 }

	50

	51 const std::vector<MultiEndCall::SpeakingTurn>& MultiEndCall::speaking_turns()

	52 const {

	53 return speaking_turns_;

	54 }

	55

42 void MultiEndCall::FindSpeakerNames() {	56 void MultiEndCall::FindSpeakerNames() {

43 RTC_DCHECK(speaker_names_.empty());	57 RTC_DCHECK(speaker_names_.empty());

44 for (const Turn& turn : timing_) {	58 for (const Turn& turn : timing_) {

45 speaker_names_.insert(turn.speaker_name);	59 speaker_names_.emplace(turn.speaker_name);

46 }	60 }

47 }	61 }

48	62

49 void MultiEndCall::CreateAudioTrackReaders() {	63 void MultiEndCall::CreateAudioTrackReaders() {

50 RTC_DCHECK(audiotrack_readers_.empty());	64 RTC_DCHECK(audiotrack_readers_.empty());

51 for (const Turn& turn : timing_) {	65 for (const Turn& turn : timing_) {

52 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);	66 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

53 if (it != audiotrack_readers_.end())	67 if (it != audiotrack_readers_.end())

54 continue;	68 continue;

55	69

56 // Instance Pathname to retrieve the full path to the audiotrack file.	70 // Instance Pathname to retrieve the full path to the audiotrack file.

57 const rtc::Pathname audiotrack_file_path(	71 const rtc::Pathname audiotrack_file_path(

58 audiotracks_path_, turn.audiotrack_file_name);	72 audiotracks_path_, turn.audiotrack_file_name);

59	73

60 // Map the audiotrack file name to a new instance of WavReaderInterface.	74 // Map the audiotrack file name to a new instance of WavReaderInterface.

61 std::unique_ptr<WavReaderInterface> wavreader =	75 std::unique_ptr<WavReaderInterface> wavreader =

62 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());	76 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());

63 audiotrack_readers_.insert(std::make_pair(	77 audiotrack_readers_.emplace(

64 turn.audiotrack_file_name, std::move(wavreader)));	78 turn.audiotrack_file_name, std::move(wavreader));

65 }	79 }

66 }	80 }

67	81

68 void MultiEndCall::CheckTiming() {	82 bool MultiEndCall::CheckTiming() {

69 // TODO(alessiob): use audiotrack lengths and offset to check whether the	83 struct Interval {

70 // timing is valid.	84 size_t begin;

	85 size_t end;

	86 };

	87 size_t number_of_turns = timing_.size();

	88 auto millisecond_to_samples = [](int ms, int sr) -> int {

	89 // Truncation may happen if the sampling rate is not an integer multiple

	90 // of 1000 (e.g., 44100).

	91 return ms * sr / 1000;

	92 };

	93 auto in_interval = [](size_t value, const Interval& interval) {

	94 return interval.begin <= value && value < interval.end;

	95 };

	96 total_duration_samples_ = 0;

	97 speaking_turns_.clear();

	98

	99 // Begin and end timestamps for the last two turns (unit: number of samples).

	100 Interval second_last_turn = {0, 0};

	101 Interval last_turn = {0, 0};

	102

	103 // Initialize map to store speaking turn indices of each speaker (used to

	104 // detect self cross-talk).

	105 std::map<std::string, std::vector<size_t>> speaking_turn_indices;

	106 for (const std::string& speaker_name : speaker_names_) {

	107 speaking_turn_indices.emplace(

	108 std::piecewise_construct,

	109 std::forward_as_tuple(speaker_name),

	110 std::forward_as_tuple());

	111 }

	112

	113 // Parse turns.

	114 for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {

	115 const Turn& turn = timing_[turn_index];

	116 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

	117 RTC_CHECK(it != audiotrack_readers_.end())

	118 << "Audio track reader not created";

	119

	120 // Begin and end timestamps for the current turn.

	121 int offset_samples = millisecond_to_samples(

	122 turn.offset, it->second->sample_rate());

	123 size_t begin_timestamp = last_turn.end + offset_samples;

	124 size_t end_timestamp = begin_timestamp + it->second->num_samples();

	125 LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp

	126 << "-" << end_timestamp << " ms";

	127

	128 // The order is invalid if the offset is negative and its absolute value is

	129 // larger then the duration of the previous turn.

	130 if (offset_samples < 0 && -offset_samples > static_cast<int>(

	131 last_turn.end - last_turn.begin)) {

	132 LOG(LS_ERROR) << "invalid order";

	133 return false;

	134 }

	135

	136 // Cross-talk with 3 or more speakers occurs when the beginning of the

	137 // current interval falls in the last two turns.

	138 if (turn_index > 1 && in_interval(begin_timestamp, last_turn)

	139 && in_interval(begin_timestamp, second_last_turn)) {

	140 LOG(LS_ERROR) << "cross-talk with 3+ speakers";

	141 return false;

	142 }

	143

	144 // Append turn.

	145 speaking_turns_.emplace_back(

	146 turn.speaker_name, turn.audiotrack_file_name,

	147 begin_timestamp, end_timestamp);

	148

	149 // Save speaking turn index for self cross-talk detection.

	150 RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1);

	151 speaking_turn_indices[turn.speaker_name].push_back(turn_index);

	152

	153 // Update total duration of the consversational speech.

	154 if (total_duration_samples_ < end_timestamp)

	155 total_duration_samples_ = end_timestamp;

	156

	157 // Update and continue with next turn.

	158 second_last_turn = last_turn;

	159 last_turn.begin = begin_timestamp;

	160 last_turn.end = end_timestamp;

	161 }

	162

	163 // Detect self cross-talk.

	164 for (const std::string& speaker_name : speaker_names_) {

	165 LOG(LS_INFO) << "checking self cross-talk for <"

	166 << speaker_name << ">";

	167

	168 // Copy all turns for this speaker to new vector.

	169 std::vector<SpeakingTurn> speaking_turns_for_name;

	170 std::copy_if(speaking_turns_.begin(), speaking_turns_.end(),

	171 std::back_inserter(speaking_turns_for_name),

	172 [&speaker_name](const SpeakingTurn& st){

	173 return st.speaker_name == speaker_name; });

	174

	175 // Check for overlap between adjacent elements.

	176 // This is a sufficient condition for self cross-talk since the intervals

	177 // are sorted by begin timestamp.

	178 auto overlap = std::adjacent_find(

	179 speaking_turns_for_name.begin(), speaking_turns_for_name.end(),

	180 [](const SpeakingTurn& a, const SpeakingTurn& b) {

	181 return a.end > b.begin; });

	182

	183 if (overlap != speaking_turns_for_name.end()) {

	184 LOG(LS_ERROR) << "Self cross-talk detected";

	185 return false;

	186 }

	187 }

	188

	189 return true;

71 }	190 }

72	191

73 } // namespace conversational_speech	192 } // namespace conversational_speech

74 } // namespace test	193 } // namespace test

75 } // namespace webrtc	194 } // namespace webrtc

OLD	NEW