OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" | 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" |
12 | 12 |
13 #include <assert.h> | 13 #include <assert.h> |
14 | 14 |
| 15 #include "webrtc/base/criticalsection.h" |
| 16 #include "webrtc/base/thread_checker.h" |
15 #include "webrtc/common_audio/vad/include/webrtc_vad.h" | 17 #include "webrtc/common_audio/vad/include/webrtc_vad.h" |
16 #include "webrtc/modules/audio_processing/audio_buffer.h" | 18 #include "webrtc/modules/audio_processing/audio_buffer.h" |
17 #include "webrtc/system_wrappers/include/critical_section_wrapper.h" | |
18 | 19 |
19 namespace webrtc { | 20 namespace webrtc { |
20 | 21 |
21 typedef VadInst Handle; | 22 typedef VadInst Handle; |
22 | 23 |
23 namespace { | 24 namespace { |
24 int MapSetting(VoiceDetection::Likelihood likelihood) { | 25 int MapSetting(VoiceDetection::Likelihood likelihood) { |
25 switch (likelihood) { | 26 switch (likelihood) { |
26 case VoiceDetection::kVeryLowLikelihood: | 27 case VoiceDetection::kVeryLowLikelihood: |
27 return 3; | 28 return 3; |
28 case VoiceDetection::kLowLikelihood: | 29 case VoiceDetection::kLowLikelihood: |
29 return 2; | 30 return 2; |
30 case VoiceDetection::kModerateLikelihood: | 31 case VoiceDetection::kModerateLikelihood: |
31 return 1; | 32 return 1; |
32 case VoiceDetection::kHighLikelihood: | 33 case VoiceDetection::kHighLikelihood: |
33 return 0; | 34 return 0; |
34 } | 35 } |
35 assert(false); | 36 assert(false); |
36 return -1; | 37 return -1; |
37 } | 38 } |
38 } // namespace | 39 } // namespace |
39 | 40 |
40 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, | 41 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, |
41 CriticalSectionWrapper* crit) | 42 rtc::CriticalSection* crit) |
42 : ProcessingComponent(), | 43 : ProcessingComponent(), |
43 apm_(apm), | 44 apm_(apm), |
44 crit_(crit), | 45 crit_(crit), |
45 stream_has_voice_(false), | 46 stream_has_voice_(false), |
46 using_external_vad_(false), | 47 using_external_vad_(false), |
47 likelihood_(kLowLikelihood), | 48 likelihood_(kLowLikelihood), |
48 frame_size_ms_(10), | 49 frame_size_ms_(10), |
49 frame_size_samples_(0) {} | 50 frame_size_samples_(0) { |
| 51 RTC_DCHECK(apm); |
| 52 RTC_DCHECK(crit); |
| 53 } |
50 | 54 |
51 VoiceDetectionImpl::~VoiceDetectionImpl() {} | 55 VoiceDetectionImpl::~VoiceDetectionImpl() {} |
52 | 56 |
53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { | 57 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { |
| 58 rtc::CritScope cs(crit_); |
54 if (!is_component_enabled()) { | 59 if (!is_component_enabled()) { |
55 return apm_->kNoError; | 60 return apm_->kNoError; |
56 } | 61 } |
57 | 62 |
58 if (using_external_vad_) { | 63 if (using_external_vad_) { |
59 using_external_vad_ = false; | 64 using_external_vad_ = false; |
60 return apm_->kNoError; | 65 return apm_->kNoError; |
61 } | 66 } |
62 assert(audio->num_frames_per_band() <= 160); | 67 assert(audio->num_frames_per_band() <= 160); |
63 | 68 |
(...skipping 10 matching lines...) Expand all Loading... |
74 stream_has_voice_ = true; | 79 stream_has_voice_ = true; |
75 audio->set_activity(AudioFrame::kVadActive); | 80 audio->set_activity(AudioFrame::kVadActive); |
76 } else { | 81 } else { |
77 return apm_->kUnspecifiedError; | 82 return apm_->kUnspecifiedError; |
78 } | 83 } |
79 | 84 |
80 return apm_->kNoError; | 85 return apm_->kNoError; |
81 } | 86 } |
82 | 87 |
83 int VoiceDetectionImpl::Enable(bool enable) { | 88 int VoiceDetectionImpl::Enable(bool enable) { |
84 CriticalSectionScoped crit_scoped(crit_); | 89 rtc::CritScope cs(crit_); |
85 return EnableComponent(enable); | 90 return EnableComponent(enable); |
86 } | 91 } |
87 | 92 |
88 bool VoiceDetectionImpl::is_enabled() const { | 93 bool VoiceDetectionImpl::is_enabled() const { |
| 94 rtc::CritScope cs(crit_); |
89 return is_component_enabled(); | 95 return is_component_enabled(); |
90 } | 96 } |
91 | 97 |
92 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { | 98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { |
| 99 rtc::CritScope cs(crit_); |
93 using_external_vad_ = true; | 100 using_external_vad_ = true; |
94 stream_has_voice_ = has_voice; | 101 stream_has_voice_ = has_voice; |
95 return apm_->kNoError; | 102 return apm_->kNoError; |
96 } | 103 } |
97 | 104 |
98 bool VoiceDetectionImpl::stream_has_voice() const { | 105 bool VoiceDetectionImpl::stream_has_voice() const { |
| 106 rtc::CritScope cs(crit_); |
99 // TODO(ajm): enable this assertion? | 107 // TODO(ajm): enable this assertion? |
100 //assert(using_external_vad_ || is_component_enabled()); | 108 //assert(using_external_vad_ || is_component_enabled()); |
101 return stream_has_voice_; | 109 return stream_has_voice_; |
102 } | 110 } |
103 | 111 |
104 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { | 112 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { |
105 CriticalSectionScoped crit_scoped(crit_); | 113 rtc::CritScope cs(crit_); |
106 if (MapSetting(likelihood) == -1) { | 114 if (MapSetting(likelihood) == -1) { |
107 return apm_->kBadParameterError; | 115 return apm_->kBadParameterError; |
108 } | 116 } |
109 | 117 |
110 likelihood_ = likelihood; | 118 likelihood_ = likelihood; |
111 return Configure(); | 119 return Configure(); |
112 } | 120 } |
113 | 121 |
114 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { | 122 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { |
| 123 rtc::CritScope cs(crit_); |
115 return likelihood_; | 124 return likelihood_; |
116 } | 125 } |
117 | 126 |
118 int VoiceDetectionImpl::set_frame_size_ms(int size) { | 127 int VoiceDetectionImpl::set_frame_size_ms(int size) { |
119 CriticalSectionScoped crit_scoped(crit_); | 128 rtc::CritScope cs(crit_); |
120 assert(size == 10); // TODO(ajm): remove when supported. | 129 assert(size == 10); // TODO(ajm): remove when supported. |
121 if (size != 10 && | 130 if (size != 10 && |
122 size != 20 && | 131 size != 20 && |
123 size != 30) { | 132 size != 30) { |
124 return apm_->kBadParameterError; | 133 return apm_->kBadParameterError; |
125 } | 134 } |
126 | 135 |
127 frame_size_ms_ = size; | 136 frame_size_ms_ = size; |
128 | 137 |
129 return Initialize(); | 138 return Initialize(); |
130 } | 139 } |
131 | 140 |
132 int VoiceDetectionImpl::frame_size_ms() const { | 141 int VoiceDetectionImpl::frame_size_ms() const { |
| 142 rtc::CritScope cs(crit_); |
133 return frame_size_ms_; | 143 return frame_size_ms_; |
134 } | 144 } |
135 | 145 |
136 int VoiceDetectionImpl::Initialize() { | 146 int VoiceDetectionImpl::Initialize() { |
137 int err = ProcessingComponent::Initialize(); | 147 int err = ProcessingComponent::Initialize(); |
| 148 |
| 149 rtc::CritScope cs(crit_); |
138 if (err != apm_->kNoError || !is_component_enabled()) { | 150 if (err != apm_->kNoError || !is_component_enabled()) { |
139 return err; | 151 return err; |
140 } | 152 } |
141 | 153 |
142 using_external_vad_ = false; | 154 using_external_vad_ = false; |
143 frame_size_samples_ = static_cast<size_t>( | 155 frame_size_samples_ = static_cast<size_t>( |
144 frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); | 156 frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); |
145 // TODO(ajm): intialize frame buffer here. | 157 // TODO(ajm): intialize frame buffer here. |
146 | 158 |
147 return apm_->kNoError; | 159 return apm_->kNoError; |
148 } | 160 } |
149 | 161 |
150 void* VoiceDetectionImpl::CreateHandle() const { | 162 void* VoiceDetectionImpl::CreateHandle() const { |
151 return WebRtcVad_Create(); | 163 return WebRtcVad_Create(); |
152 } | 164 } |
153 | 165 |
154 void VoiceDetectionImpl::DestroyHandle(void* handle) const { | 166 void VoiceDetectionImpl::DestroyHandle(void* handle) const { |
155 WebRtcVad_Free(static_cast<Handle*>(handle)); | 167 WebRtcVad_Free(static_cast<Handle*>(handle)); |
156 } | 168 } |
157 | 169 |
158 int VoiceDetectionImpl::InitializeHandle(void* handle) const { | 170 int VoiceDetectionImpl::InitializeHandle(void* handle) const { |
159 return WebRtcVad_Init(static_cast<Handle*>(handle)); | 171 return WebRtcVad_Init(static_cast<Handle*>(handle)); |
160 } | 172 } |
161 | 173 |
162 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { | 174 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { |
| 175 rtc::CritScope cs(crit_); |
163 return WebRtcVad_set_mode(static_cast<Handle*>(handle), | 176 return WebRtcVad_set_mode(static_cast<Handle*>(handle), |
164 MapSetting(likelihood_)); | 177 MapSetting(likelihood_)); |
165 } | 178 } |
166 | 179 |
167 int VoiceDetectionImpl::num_handles_required() const { | 180 int VoiceDetectionImpl::num_handles_required() const { |
168 return 1; | 181 return 1; |
169 } | 182 } |
170 | 183 |
171 int VoiceDetectionImpl::GetHandleError(void* handle) const { | 184 int VoiceDetectionImpl::GetHandleError(void* handle) const { |
172 // The VAD has no get_error() function. | 185 // The VAD has no get_error() function. |
173 assert(handle != NULL); | 186 assert(handle != NULL); |
174 return apm_->kUnspecifiedError; | 187 return apm_->kUnspecifiedError; |
175 } | 188 } |
176 } // namespace webrtc | 189 } // namespace webrtc |
OLD | NEW |