webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc - Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests

Side by Side Diff: webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc

Issue 2781573002: Conversational Speech tool, MultiEndCall::CheckTiming() and tests (Closed)

Patch Set: missing include to get std::back_inserter working on win targets Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"	11 #include "webrtc/modules/audio_processing/test/conversational_speech/multiend_ca ll.h"

12	12

13 #include <utility>	13 #include <algorithm>

	14 #include <iterator>

14	15

	16 #include "webrtc/base/logging.h"

15 #include "webrtc/base/pathutils.h"	17 #include "webrtc/base/pathutils.h"

16	18

17 namespace webrtc {	19 namespace webrtc {

18 namespace test {	20 namespace test {

19 namespace conversational_speech {	21 namespace conversational_speech {

20	22

21 MultiEndCall::MultiEndCall(	23 MultiEndCall::MultiEndCall(

22 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,	24 rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path,

23 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)	25 std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory)

24 : timing_(timing), audiotracks_path_(audiotracks_path),	26 : timing_(timing), audiotracks_path_(audiotracks_path),

25 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {	27 wavreader_abstract_factory_(std::move(wavreader_abstract_factory)) {

26 FindSpeakerNames();	28 FindSpeakerNames();

27 CreateAudioTrackReaders();	29 CreateAudioTrackReaders();

28 CheckTiming();	30 valid_ = CheckTiming();

29 }	31 }

30	32

31 MultiEndCall::~MultiEndCall() = default;	33 MultiEndCall::~MultiEndCall() = default;

32	34

33 const std::set<std::string>& MultiEndCall::speaker_names() const {	35 const std::set<std::string>& MultiEndCall::speaker_names() const {

34 return speaker_names_;	36 return speaker_names_;

35 }	37 }

36	38

37 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&	39 const std::map<std::string, std::unique_ptr<WavReaderInterface>>&

38 MultiEndCall::audiotrack_readers() const {	40 MultiEndCall::audiotrack_readers() const {

39 return audiotrack_readers_;	41 return audiotrack_readers_;

40 }	42 }

41	43

	44 bool MultiEndCall::valid() const {

	45 return valid_;

	46 }

	47

	48 size_t MultiEndCall::total_duration_samples() const {

	49 return total_duration_samples_;

	50 }

	51

	52 const std::vector<MultiEndCall::SpeakingTurn>& MultiEndCall::speaking_turns()

	53 const {

	54 return speaking_turns_;

	55 }

	56

42 void MultiEndCall::FindSpeakerNames() {	57 void MultiEndCall::FindSpeakerNames() {

43 RTC_DCHECK(speaker_names_.empty());	58 RTC_DCHECK(speaker_names_.empty());

44 for (const Turn& turn : timing_) {	59 for (const Turn& turn : timing_) {

45 speaker_names_.insert(turn.speaker_name);	60 speaker_names_.emplace(turn.speaker_name);

46 }	61 }

47 }	62 }

48	63

49 void MultiEndCall::CreateAudioTrackReaders() {	64 void MultiEndCall::CreateAudioTrackReaders() {

50 RTC_DCHECK(audiotrack_readers_.empty());	65 RTC_DCHECK(audiotrack_readers_.empty());

51 for (const Turn& turn : timing_) {	66 for (const Turn& turn : timing_) {

52 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);	67 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

53 if (it != audiotrack_readers_.end())	68 if (it != audiotrack_readers_.end())

54 continue;	69 continue;

55	70

56 // Instance Pathname to retrieve the full path to the audiotrack file.	71 // Instance Pathname to retrieve the full path to the audiotrack file.

57 const rtc::Pathname audiotrack_file_path(	72 const rtc::Pathname audiotrack_file_path(

58 audiotracks_path_, turn.audiotrack_file_name);	73 audiotracks_path_, turn.audiotrack_file_name);

59	74

60 // Map the audiotrack file name to a new instance of WavReaderInterface.	75 // Map the audiotrack file name to a new instance of WavReaderInterface.

61 std::unique_ptr<WavReaderInterface> wavreader =	76 std::unique_ptr<WavReaderInterface> wavreader =

62 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());	77 wavreader_abstract_factory_->Create(audiotrack_file_path.pathname());

63 audiotrack_readers_.insert(std::make_pair(	78 audiotrack_readers_.emplace(

64 turn.audiotrack_file_name, std::move(wavreader)));	79 turn.audiotrack_file_name, std::move(wavreader));

65 }	80 }

66 }	81 }

67	82

68 void MultiEndCall::CheckTiming() {	83 bool MultiEndCall::CheckTiming() {

69 // TODO(alessiob): use audiotrack lengths and offset to check whether the	84 struct Interval {

70 // timing is valid.	85 size_t begin;

	86 size_t end;

	87 };

	88 size_t number_of_turns = timing_.size();

	89 auto millisecond_to_samples = [](int ms, int sr) -> int {

	90 // Truncation may happen if the sampling rate is not an integer multiple

	91 // of 1000 (e.g., 44100).

	92 return ms * sr / 1000;

	93 };

	94 auto in_interval = [](size_t value, const Interval& interval) {

	95 return interval.begin <= value && value < interval.end;

	96 };

	97 total_duration_samples_ = 0;

	98 speaking_turns_.clear();

	99

	100 // Begin and end timestamps for the last two turns (unit: number of samples).

	101 Interval second_last_turn = {0, 0};

	102 Interval last_turn = {0, 0};

	103

	104 // Initialize map to store speaking turn indices of each speaker (used to

	105 // detect self cross-talk).

	106 std::map<std::string, std::vector<size_t>> speaking_turn_indices;

	107 for (const std::string& speaker_name : speaker_names_) {

	108 speaking_turn_indices.emplace(

	109 std::piecewise_construct,

	110 std::forward_as_tuple(speaker_name),

	111 std::forward_as_tuple());

	112 }

	113

	114 // Parse turns.

	115 for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) {

	116 const Turn& turn = timing_[turn_index];

	117 auto it = audiotrack_readers_.find(turn.audiotrack_file_name);

	118 RTC_CHECK(it != audiotrack_readers_.end())

	119 << "Audio track reader not created";

	120

	121 // Begin and end timestamps for the current turn.

	122 int offset_samples = millisecond_to_samples(

	123 turn.offset, it->second->sample_rate());

	124 size_t begin_timestamp = last_turn.end + offset_samples;

	125 size_t end_timestamp = begin_timestamp + it->second->num_samples();

	126 LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp

	127 << "-" << end_timestamp << " ms";

	128

	129 // The order is invalid if the offset is negative and its absolute value is

	130 // larger then the duration of the previous turn.

	131 if (offset_samples < 0 && -offset_samples > static_cast<int>(

	132 last_turn.end - last_turn.begin)) {

	133 LOG(LS_ERROR) << "invalid order";

	134 return false;

	135 }

	136

	137 // Cross-talk with 3 or more speakers occurs when the beginning of the

	138 // current interval falls in the last two turns.

	139 if (turn_index > 1 && in_interval(begin_timestamp, last_turn)

	140 && in_interval(begin_timestamp, second_last_turn)) {

	141 LOG(LS_ERROR) << "cross-talk with 3+ speakers";

	142 return false;

	143 }

	144

	145 // Append turn.

	146 speaking_turns_.emplace_back(

	147 turn.speaker_name, turn.audiotrack_file_name,

	148 begin_timestamp, end_timestamp);

	149

	150 // Save speaking turn index for self cross-talk detection.

	151 RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1);

	152 speaking_turn_indices[turn.speaker_name].push_back(turn_index);

	153

	154 // Update total duration of the consversational speech.

	155 if (total_duration_samples_ < end_timestamp)

	156 total_duration_samples_ = end_timestamp;

	157

	158 // Update and continue with next turn.

	159 second_last_turn = last_turn;

	160 last_turn.begin = begin_timestamp;

	161 last_turn.end = end_timestamp;

	162 }

	163

	164 // Detect self cross-talk.

	165 for (const std::string& speaker_name : speaker_names_) {

	166 LOG(LS_INFO) << "checking self cross-talk for <"

	167 << speaker_name << ">";

	168

	169 // Copy all turns for this speaker to new vector.

	170 std::vector<SpeakingTurn> speaking_turns_for_name;

	171 std::copy_if(speaking_turns_.begin(), speaking_turns_.end(),

	172 std::back_inserter(speaking_turns_for_name),

	173 [&speaker_name](const SpeakingTurn& st){

	174 return st.speaker_name == speaker_name; });

	175

	176 // Check for overlap between adjacent elements.

	177 // This is a sufficient condition for self cross-talk since the intervals

	178 // are sorted by begin timestamp.

	179 auto overlap = std::adjacent_find(

	180 speaking_turns_for_name.begin(), speaking_turns_for_name.end(),

	181 [](const SpeakingTurn& a, const SpeakingTurn& b) {

	182 return a.end > b.begin; });

	183

	184 if (overlap != speaking_turns_for_name.end()) {

	185 LOG(LS_ERROR) << "Self cross-talk detected";

	186 return false;

	187 }

	188 }

	189

	190 return true;

71 }	191 }

72	192

73 } // namespace conversational_speech	193 } // namespace conversational_speech

74 } // namespace test	194 } // namespace test

75 } // namespace webrtc	195 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h ('k') | no next file » | no next file with comments »