webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc - Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)

Patch Set: rebase Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc ('K') | « webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

12	12

13 #include <utility>	13 #include "webrtc/base/logging.h"

14

15 #include "webrtc/base/pathutils.h"	14 #include "webrtc/base/pathutils.h"

16	15

17 namespace webrtc {	16 namespace webrtc {

18 namespace test {	17 namespace test {

19 namespace conversational_speech {	18 namespace conversational_speech {

20	19

21 MultiEndCall::MultiEndCall(	20 MultiEndCall::MultiEndCall(

22 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,	21 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,

23 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)	22 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)

24 : timing_(timing), audiotracks_path_(audiotracks_path),	23 : timing_(timing), audiotracks_path_(audiotracks_path),

25 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {	24 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {

26 FindSpeakerNames();	25 FindSpeakerNames();

27 CreateAudioTrackReaders();	26 CreateAudioTrackReaders();

28 CheckTiming();	27 valid_ = CheckTiming();

29 }	28 }

30	29

31 MultiEndCall::~MultiEndCall() = default;	30 MultiEndCall::~MultiEndCall() = default;

32	31

33 const std::set<std::string>& MultiEndCall::speaker_names() const {	32 const std::set<std::string>& MultiEndCall::speaker_names() const {

34 return speaker_names_;	33 return speaker_names_;

35 }	34 }

36	35

37 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&	36 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&

38 MultiEndCall::audiotrack_readers() const {	37 MultiEndCall::audiotrack_readers() const {

39 return audiotrack_readers_;	38 return audiotrack_readers_;

40 }	39 }

41	40

	41 bool MultiEndCall::valid() {

	42 return valid_;

	43 }

	44

42 void MultiEndCall::FindSpeakerNames() {	45 void MultiEndCall::FindSpeakerNames() {

43 RTC_DCHECK(speaker_names_.empty());	46 RTC_DCHECK(speaker_names_.empty());

44 for (const Turn& turn : timing_) {	47 for (const Turn& turn : timing_) {

45 speaker_names_.insert(turn.speaker_name);	48 speaker_names_.insert(turn.speaker_name);

46 }	49 }

47 }	50 }

48	51

49 void MultiEndCall::CreateAudioTrackReaders() {	52 void MultiEndCall::CreateAudioTrackReaders() {

50 RTC_DCHECK(audiotrack_readers_.empty());	53 RTC_DCHECK(audiotrack_readers_.empty());

51 for (const Turn& turn : timing_) {	54 for (const Turn& turn : timing_) {

52 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);	55 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

53 if (it != audiotrack_readers_.end())	56 if (it != audiotrack_readers_.end())

54 continue;	57 continue;

55	58

56 // Instance Pathname to retrieve the full path to the audiotrack file.	59 // Instance Pathname to retrieve the full path to the audiotrack file.

57 const rtc::Pathname audiotrack_file_path(	60 const rtc::Pathname audiotrack_file_path(

58 audiotracks_path_, turn.audiotrack_file_name);	61 audiotracks_path_, turn.audiotrack_file_name);

59	62

60 // Map the audiotrack file name to a new instance of WavReaderInterface.	63 // Map the audiotrack file name to a new instance of WavReaderInterface.

61 std::unique_ptr<WavReaderInterface> wavreader =	64 std::unique_ptr<WavReaderInterface> wavreader =

62 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());	65 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());

63 audiotrack_readers_.insert(std::make_pair(	66 audiotrack_readers_.insert(std::make_pair(

64 turn.audiotrack_file_name, std::move(wavreader)));	67 turn.audiotrack_file_name, std::move(wavreader)));

65 }	68 }

66 }	69 }

67	70

68 void MultiEndCall::CheckTiming() {	71 bool MultiEndCall::CheckTiming() const {

69 // TODO(alessiob): use audiotrack lengths and offset to check whether the	72 std::size_t number_of_turns = timing_.size();

70 // timing is valid.	73 auto millisecond_to_samples = [](int ms, int sr) -> int {

	74 return ms * sr / 1000;

	75 };

	76 auto in_interval = [](std::size_t value, const Interval& interval) {

	77 return interval.first <= value && value < interval.second;

	78 };

	79

	80 // Begin and end timestamps for the last two turns (unit: number of samples).

	81 Interval second_last_turn = {0, 0};

	82 Interval last_turn = {0, 0};

	83

	84 // Initialize map to store turn intervals of each speaker (used to detect self

	85 // cross-talk).

	86 std::map<std::string, std::unique_ptr<IntervalsVector>> speakers_intervals;

	87 for (const std::string& speaker_name : speaker_names_) {

	88 // Initialize a vector.

	89 speakers_intervals.insert(std::make_pair(

	90 speaker_name, std::unique_ptr<IntervalsVector>(

	91 new IntervalsVector())));

	92 LOG(LS_VERBOSE) << "speaker_intervals vector for <" << speaker_name

	93 << "> preallocated (capacity: "

	94 << speakers_intervals[speaker_name]->capacity() << ")";

	95 }

	96

	97 // Parse turns.

	98 for (std::size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {

	99 const Turn& turn = timing_[turn_index];

	100 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

	101 RTC_CHECK(it != audiotrack_readers_.end())

	102 << "Audio track reader not created";

	103

	104 // Begin and end timestamps for the current turn.

	105 int offset_samples = millisecond_to_samples(

	106 turn.offset, it->second->sample_rate());

	107 std::size_t begin_timestamp = last_turn.second + offset_samples;

	108 std::size_t end_timestamp = begin_timestamp + it->second->num_samples();

	109 LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp

	110 << "-" << end_timestamp << " ms";

	111

	112 // The order is invalid if the offset is negative and its absolute value is

	113 // larger then the duration of the previous turn.

	114 if (offset_samples < 0 && -offset_samples > int(

	115 last_turn.second - last_turn.first)) {

	116 LOG(LS_ERROR) << "invalid order";

	117 return false;

	118 }

	119

	120 // Cross-talk with 3 or more speakers occurs when the beginning of the

	121 // current interval falls in the last two turns.

	122 if (turn_index > 1 && in_interval(begin_timestamp, last_turn)

	123 && in_interval(begin_timestamp, second_last_turn)) {

	124 LOG(LS_ERROR) << "cross-talk with 3+ speakers";

	125 return false;

	126 }

	127

	128 // Save speaker turn interval.

	129 Interval current_turn = {begin_timestamp, end_timestamp};

	130 speakers_intervals[turn.speaker_name]->push_back(current_turn);

	131

	132 // Update and continue with next turn.

	133 second_last_turn = last_turn;

	134 last_turn = current_turn;

	135 }

	136

	137 // Detect self cross-talk.

	138 for (const std::string& speaker_name : speaker_names_) {

	139 LOG(LS_INFO) << "checking self cross-talk for <"

	140 << speaker_name << ">";

	141 if (DetectSelfCrossTalk(speakers_intervals[speaker_name].get())) {

	142 LOG(LS_ERROR) << "Self cross-talk detected";

	143 return false;

	144 }

	145 }

	146

	147 return true;

	148 }

	149

	150 bool MultiEndCall::DetectSelfCrossTalk(IntervalsVector* speaker_intervals)

	151 const {

	152 Interval previous_interval = speaker_intervals->at(0);

	153 LOG(LS_VERBOSE) << "#0" << ": " << previous_interval.first << " "

	154 << previous_interval.second;

	155 for (std::size_t index = 1; index < speaker_intervals->size(); ++index) {

	156 auto interval = speaker_intervals->at(index);

	157 LOG(LS_VERBOSE) << "#" << index << ": " << interval.first << " "

	158 << interval.second;

	159

	160 // Check if there is overlap with the previous interval.

	161 // This is a sufficient condition for self cross-talk since the intervals

	162 // are sorted by begin timestamp.

	163 if (previous_interval.second > interval.first) {

	164 return true;

	165 }

	166

	167 // Update and continue with next turn.

	168 previous_interval = interval;

	169 }

	170 return false;

71 }	171 }

72	172

73 } // namespace conversational_speech	173 } // namespace conversational_speech

74 } // namespace test	174 } // namespace test

75 } // namespace webrtc	175 } // namespace webrtc

OLD	NEW