OLD | NEW |
| (Empty) |
1 /* | |
2 * libjingle | |
3 * Copyright 2011 Google Inc. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without | |
6 * modification, are permitted provided that the following conditions are met: | |
7 * | |
8 * 1. Redistributions of source code must retain the above copyright notice, | |
9 * this list of conditions and the following disclaimer. | |
10 * 2. Redistributions in binary form must reproduce the above copyright notice, | |
11 * this list of conditions and the following disclaimer in the documentation | |
12 * and/or other materials provided with the distribution. | |
13 * 3. The name of the author may not be used to endorse or promote products | |
14 * derived from this software without specific prior written permission. | |
15 * | |
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO | |
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 */ | |
27 | |
28 #include "talk/session/media/currentspeakermonitor.h" | |
29 | |
30 #include "talk/session/media/audiomonitor.h" | |
31 #include "webrtc/base/logging.h" | |
32 #include "webrtc/media/base/streamparams.h" | |
33 | |
34 namespace cricket { | |
35 | |
36 namespace { | |
37 const int kMaxAudioLevel = 9; | |
38 // To avoid overswitching, we disable switching for a period of time after a | |
39 // switch is done. | |
40 const int kDefaultMinTimeBetweenSwitches = 1000; | |
41 } | |
42 | |
43 CurrentSpeakerMonitor::CurrentSpeakerMonitor( | |
44 AudioSourceContext* audio_source_context) | |
45 : started_(false), | |
46 audio_source_context_(audio_source_context), | |
47 current_speaker_ssrc_(0), | |
48 earliest_permitted_switch_time_(0), | |
49 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {} | |
50 | |
51 CurrentSpeakerMonitor::~CurrentSpeakerMonitor() { | |
52 Stop(); | |
53 } | |
54 | |
55 void CurrentSpeakerMonitor::Start() { | |
56 if (!started_) { | |
57 audio_source_context_->SignalAudioMonitor.connect( | |
58 this, &CurrentSpeakerMonitor::OnAudioMonitor); | |
59 audio_source_context_->SignalMediaStreamsUpdate.connect( | |
60 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate); | |
61 audio_source_context_->SignalMediaStreamsReset.connect( | |
62 this, &CurrentSpeakerMonitor::OnMediaStreamsReset); | |
63 | |
64 started_ = true; | |
65 } | |
66 } | |
67 | |
68 void CurrentSpeakerMonitor::Stop() { | |
69 if (started_) { | |
70 audio_source_context_->SignalAudioMonitor.disconnect(this); | |
71 audio_source_context_->SignalMediaStreamsUpdate.disconnect(this); | |
72 | |
73 started_ = false; | |
74 ssrc_to_speaking_state_map_.clear(); | |
75 current_speaker_ssrc_ = 0; | |
76 earliest_permitted_switch_time_ = 0; | |
77 } | |
78 } | |
79 | |
80 void CurrentSpeakerMonitor::set_min_time_between_switches( | |
81 uint32_t min_time_between_switches) { | |
82 min_time_between_switches_ = min_time_between_switches; | |
83 } | |
84 | |
85 void CurrentSpeakerMonitor::OnAudioMonitor( | |
86 AudioSourceContext* audio_source_context, const AudioInfo& info) { | |
87 std::map<uint32_t, int> active_ssrc_to_level_map; | |
88 cricket::AudioInfo::StreamList::const_iterator stream_list_it; | |
89 for (stream_list_it = info.active_streams.begin(); | |
90 stream_list_it != info.active_streams.end(); ++stream_list_it) { | |
91 uint32_t ssrc = stream_list_it->first; | |
92 active_ssrc_to_level_map[ssrc] = stream_list_it->second; | |
93 | |
94 // It's possible we haven't yet added this source to our map. If so, | |
95 // add it now with a "not speaking" state. | |
96 if (ssrc_to_speaking_state_map_.find(ssrc) == | |
97 ssrc_to_speaking_state_map_.end()) { | |
98 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING; | |
99 } | |
100 } | |
101 | |
102 int max_level = 0; | |
103 uint32_t loudest_speaker_ssrc = 0; | |
104 | |
105 // Update the speaking states of all participants based on the new audio | |
106 // level information. Also retain loudest speaker. | |
107 std::map<uint32_t, SpeakingState>::iterator state_it; | |
108 for (state_it = ssrc_to_speaking_state_map_.begin(); | |
109 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) { | |
110 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first; | |
111 | |
112 // This uses a state machine in order to gradually identify | |
113 // members as having started or stopped speaking. Matches the | |
114 // algorithm used by the hangouts js code. | |
115 | |
116 std::map<uint32_t, int>::const_iterator level_it = | |
117 active_ssrc_to_level_map.find(state_it->first); | |
118 // Note that the stream map only contains streams with non-zero audio | |
119 // levels. | |
120 int level = (level_it != active_ssrc_to_level_map.end()) ? | |
121 level_it->second : 0; | |
122 switch (state_it->second) { | |
123 case SS_NOT_SPEAKING: | |
124 if (level > 0) { | |
125 // Reset level because we don't think they're really speaking. | |
126 level = 0; | |
127 state_it->second = SS_MIGHT_BE_SPEAKING; | |
128 } else { | |
129 // State unchanged. | |
130 } | |
131 break; | |
132 case SS_MIGHT_BE_SPEAKING: | |
133 if (level > 0) { | |
134 state_it->second = SS_SPEAKING; | |
135 } else { | |
136 state_it->second = SS_NOT_SPEAKING; | |
137 } | |
138 break; | |
139 case SS_SPEAKING: | |
140 if (level > 0) { | |
141 // State unchanged. | |
142 } else { | |
143 state_it->second = SS_WAS_SPEAKING_RECENTLY1; | |
144 if (is_previous_speaker) { | |
145 // Assume this is an inter-word silence and assign him the highest | |
146 // volume. | |
147 level = kMaxAudioLevel; | |
148 } | |
149 } | |
150 break; | |
151 case SS_WAS_SPEAKING_RECENTLY1: | |
152 if (level > 0) { | |
153 state_it->second = SS_SPEAKING; | |
154 } else { | |
155 state_it->second = SS_WAS_SPEAKING_RECENTLY2; | |
156 if (is_previous_speaker) { | |
157 // Assume this is an inter-word silence and assign him the highest | |
158 // volume. | |
159 level = kMaxAudioLevel; | |
160 } | |
161 } | |
162 break; | |
163 case SS_WAS_SPEAKING_RECENTLY2: | |
164 if (level > 0) { | |
165 state_it->second = SS_SPEAKING; | |
166 } else { | |
167 state_it->second = SS_NOT_SPEAKING; | |
168 } | |
169 break; | |
170 } | |
171 | |
172 if (level > max_level) { | |
173 loudest_speaker_ssrc = state_it->first; | |
174 max_level = level; | |
175 } else if (level > 0 && level == max_level && is_previous_speaker) { | |
176 // Favor continuity of loudest speakers if audio levels are equal. | |
177 loudest_speaker_ssrc = state_it->first; | |
178 } | |
179 } | |
180 | |
181 // We avoid over-switching by disabling switching for a period of time after | |
182 // a switch is done. | |
183 uint32_t now = rtc::Time(); | |
184 if (earliest_permitted_switch_time_ <= now && | |
185 current_speaker_ssrc_ != loudest_speaker_ssrc) { | |
186 current_speaker_ssrc_ = loudest_speaker_ssrc; | |
187 LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_; | |
188 earliest_permitted_switch_time_ = now + min_time_between_switches_; | |
189 SignalUpdate(this, current_speaker_ssrc_); | |
190 } | |
191 } | |
192 | |
193 void CurrentSpeakerMonitor::OnMediaStreamsUpdate( | |
194 AudioSourceContext* audio_source_context, | |
195 const MediaStreams& added, | |
196 const MediaStreams& removed) { | |
197 if (audio_source_context == audio_source_context_) { | |
198 // Update the speaking state map based on added and removed streams. | |
199 for (std::vector<cricket::StreamParams>::const_iterator | |
200 it = removed.audio().begin(); it != removed.audio().end(); ++it) { | |
201 ssrc_to_speaking_state_map_.erase(it->first_ssrc()); | |
202 } | |
203 | |
204 for (std::vector<cricket::StreamParams>::const_iterator | |
205 it = added.audio().begin(); it != added.audio().end(); ++it) { | |
206 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING; | |
207 } | |
208 } | |
209 } | |
210 | |
211 void CurrentSpeakerMonitor::OnMediaStreamsReset( | |
212 AudioSourceContext* audio_source_context) { | |
213 if (audio_source_context == audio_source_context_) { | |
214 ssrc_to_speaking_state_map_.clear(); | |
215 } | |
216 } | |
217 | |
218 } // namespace cricket | |
OLD | NEW |