OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" | 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" |
12 | 12 |
13 #include <assert.h> | |
14 | |
15 #include "webrtc/base/criticalsection.h" | |
16 #include "webrtc/base/thread_checker.h" | |
17 #include "webrtc/common_audio/vad/include/webrtc_vad.h" | 13 #include "webrtc/common_audio/vad/include/webrtc_vad.h" |
18 #include "webrtc/modules/audio_processing/audio_buffer.h" | 14 #include "webrtc/modules/audio_processing/audio_buffer.h" |
19 | 15 |
20 namespace webrtc { | 16 namespace webrtc { |
| 17 class VoiceDetectionImpl::Vad { |
| 18 public: |
| 19 Vad() { |
| 20 state_ = WebRtcVad_Create(); |
| 21 RTC_CHECK(state_); |
| 22 int error = WebRtcVad_Init(state_); |
| 23 RTC_DCHECK_EQ(0, error); |
| 24 } |
| 25 ~Vad() { |
| 26 WebRtcVad_Free(state_); |
| 27 } |
| 28 VadInst* state() { return state_; } |
| 29 private: |
| 30 VadInst* state_ = nullptr; |
| 31 RTC_DISALLOW_COPY_AND_ASSIGN(Vad); |
| 32 }; |
21 | 33 |
22 typedef VadInst Handle; | 34 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) |
23 | 35 : crit_(crit) { |
24 namespace { | |
25 int MapSetting(VoiceDetection::Likelihood likelihood) { | |
26 switch (likelihood) { | |
27 case VoiceDetection::kVeryLowLikelihood: | |
28 return 3; | |
29 case VoiceDetection::kLowLikelihood: | |
30 return 2; | |
31 case VoiceDetection::kModerateLikelihood: | |
32 return 1; | |
33 case VoiceDetection::kHighLikelihood: | |
34 return 0; | |
35 } | |
36 assert(false); | |
37 return -1; | |
38 } | |
39 } // namespace | |
40 | |
41 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, | |
42 rtc::CriticalSection* crit) | |
43 : ProcessingComponent(), | |
44 apm_(apm), | |
45 crit_(crit), | |
46 stream_has_voice_(false), | |
47 using_external_vad_(false), | |
48 likelihood_(kLowLikelihood), | |
49 frame_size_ms_(10), | |
50 frame_size_samples_(0) { | |
51 RTC_DCHECK(apm); | |
52 RTC_DCHECK(crit); | 36 RTC_DCHECK(crit); |
53 } | 37 } |
54 | 38 |
55 VoiceDetectionImpl::~VoiceDetectionImpl() {} | 39 VoiceDetectionImpl::~VoiceDetectionImpl() {} |
56 | 40 |
57 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { | 41 void VoiceDetectionImpl::Initialize(int sample_rate_hz) { |
58 rtc::CritScope cs(crit_); | 42 rtc::CritScope cs(crit_); |
59 if (!is_component_enabled()) { | 43 sample_rate_hz_ = sample_rate_hz; |
60 return apm_->kNoError; | 44 rtc::scoped_ptr<Vad> new_vad; |
| 45 if (enabled_) { |
| 46 new_vad.reset(new Vad()); |
| 47 } |
| 48 vad_.swap(new_vad); |
| 49 using_external_vad_ = false; |
| 50 frame_size_samples_ = |
| 51 static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; |
| 52 set_likelihood(likelihood_); |
| 53 } |
| 54 |
| 55 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { |
| 56 rtc::CritScope cs(crit_); |
| 57 if (!enabled_) { |
| 58 return; |
| 59 } |
| 60 if (using_external_vad_) { |
| 61 using_external_vad_ = false; |
| 62 return; |
61 } | 63 } |
62 | 64 |
63 if (using_external_vad_) { | 65 RTC_DCHECK_GE(160u, audio->num_frames_per_band()); |
64 using_external_vad_ = false; | |
65 return apm_->kNoError; | |
66 } | |
67 assert(audio->num_frames_per_band() <= 160); | |
68 | |
69 // TODO(ajm): concatenate data in frame buffer here. | 66 // TODO(ajm): concatenate data in frame buffer here. |
70 | 67 int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, |
71 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), | |
72 apm_->proc_split_sample_rate_hz(), | |
73 audio->mixed_low_pass_data(), | 68 audio->mixed_low_pass_data(), |
74 frame_size_samples_); | 69 frame_size_samples_); |
75 if (vad_ret == 0) { | 70 if (vad_ret == 0) { |
76 stream_has_voice_ = false; | 71 stream_has_voice_ = false; |
77 audio->set_activity(AudioFrame::kVadPassive); | 72 audio->set_activity(AudioFrame::kVadPassive); |
78 } else if (vad_ret == 1) { | 73 } else if (vad_ret == 1) { |
79 stream_has_voice_ = true; | 74 stream_has_voice_ = true; |
80 audio->set_activity(AudioFrame::kVadActive); | 75 audio->set_activity(AudioFrame::kVadActive); |
81 } else { | 76 } else { |
82 return apm_->kUnspecifiedError; | 77 RTC_NOTREACHED(); |
83 } | 78 } |
84 | |
85 return apm_->kNoError; | |
86 } | 79 } |
87 | 80 |
88 int VoiceDetectionImpl::Enable(bool enable) { | 81 int VoiceDetectionImpl::Enable(bool enable) { |
89 rtc::CritScope cs(crit_); | 82 rtc::CritScope cs(crit_); |
90 return EnableComponent(enable); | 83 if (enabled_ != enable) { |
| 84 enabled_ = enable; |
| 85 Initialize(sample_rate_hz_); |
| 86 } |
| 87 return AudioProcessing::kNoError; |
91 } | 88 } |
92 | 89 |
93 bool VoiceDetectionImpl::is_enabled() const { | 90 bool VoiceDetectionImpl::is_enabled() const { |
94 rtc::CritScope cs(crit_); | 91 rtc::CritScope cs(crit_); |
95 return is_component_enabled(); | 92 return enabled_; |
96 } | 93 } |
97 | 94 |
98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { | 95 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { |
99 rtc::CritScope cs(crit_); | 96 rtc::CritScope cs(crit_); |
100 using_external_vad_ = true; | 97 using_external_vad_ = true; |
101 stream_has_voice_ = has_voice; | 98 stream_has_voice_ = has_voice; |
102 return apm_->kNoError; | 99 return AudioProcessing::kNoError; |
103 } | 100 } |
104 | 101 |
105 bool VoiceDetectionImpl::stream_has_voice() const { | 102 bool VoiceDetectionImpl::stream_has_voice() const { |
106 rtc::CritScope cs(crit_); | 103 rtc::CritScope cs(crit_); |
107 // TODO(ajm): enable this assertion? | 104 // TODO(ajm): enable this assertion? |
108 //assert(using_external_vad_ || is_component_enabled()); | 105 //assert(using_external_vad_ || is_component_enabled()); |
109 return stream_has_voice_; | 106 return stream_has_voice_; |
110 } | 107 } |
111 | 108 |
112 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { | 109 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { |
113 rtc::CritScope cs(crit_); | 110 rtc::CritScope cs(crit_); |
114 if (MapSetting(likelihood) == -1) { | 111 likelihood_ = likelihood; |
115 return apm_->kBadParameterError; | 112 if (enabled_) { |
| 113 int mode = 2; |
| 114 switch (likelihood) { |
| 115 case VoiceDetection::kVeryLowLikelihood: |
| 116 mode = 3; |
| 117 break; |
| 118 case VoiceDetection::kLowLikelihood: |
| 119 mode = 2; |
| 120 break; |
| 121 case VoiceDetection::kModerateLikelihood: |
| 122 mode = 1; |
| 123 break; |
| 124 case VoiceDetection::kHighLikelihood: |
| 125 mode = 0; |
| 126 break; |
| 127 default: |
| 128 RTC_NOTREACHED(); |
| 129 break; |
| 130 } |
| 131 int error = WebRtcVad_set_mode(vad_->state(), mode); |
| 132 RTC_DCHECK_EQ(0, error); |
116 } | 133 } |
117 | 134 return AudioProcessing::kNoError; |
118 likelihood_ = likelihood; | |
119 return Configure(); | |
120 } | 135 } |
121 | 136 |
122 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { | 137 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { |
123 rtc::CritScope cs(crit_); | 138 rtc::CritScope cs(crit_); |
124 return likelihood_; | 139 return likelihood_; |
125 } | 140 } |
126 | 141 |
127 int VoiceDetectionImpl::set_frame_size_ms(int size) { | 142 int VoiceDetectionImpl::set_frame_size_ms(int size) { |
128 rtc::CritScope cs(crit_); | 143 rtc::CritScope cs(crit_); |
129 assert(size == 10); // TODO(ajm): remove when supported. | 144 RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. |
130 if (size != 10 && | |
131 size != 20 && | |
132 size != 30) { | |
133 return apm_->kBadParameterError; | |
134 } | |
135 | |
136 frame_size_ms_ = size; | 145 frame_size_ms_ = size; |
137 | 146 Initialize(sample_rate_hz_); |
138 return Initialize(); | 147 return AudioProcessing::kNoError; |
139 } | 148 } |
140 | 149 |
141 int VoiceDetectionImpl::frame_size_ms() const { | 150 int VoiceDetectionImpl::frame_size_ms() const { |
142 rtc::CritScope cs(crit_); | 151 rtc::CritScope cs(crit_); |
143 return frame_size_ms_; | 152 return frame_size_ms_; |
144 } | 153 } |
145 | |
146 int VoiceDetectionImpl::Initialize() { | |
147 int err = ProcessingComponent::Initialize(); | |
148 | |
149 rtc::CritScope cs(crit_); | |
150 if (err != apm_->kNoError || !is_component_enabled()) { | |
151 return err; | |
152 } | |
153 | |
154 using_external_vad_ = false; | |
155 frame_size_samples_ = static_cast<size_t>( | |
156 frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); | |
157 // TODO(ajm): intialize frame buffer here. | |
158 | |
159 return apm_->kNoError; | |
160 } | |
161 | |
162 void* VoiceDetectionImpl::CreateHandle() const { | |
163 return WebRtcVad_Create(); | |
164 } | |
165 | |
166 void VoiceDetectionImpl::DestroyHandle(void* handle) const { | |
167 WebRtcVad_Free(static_cast<Handle*>(handle)); | |
168 } | |
169 | |
170 int VoiceDetectionImpl::InitializeHandle(void* handle) const { | |
171 return WebRtcVad_Init(static_cast<Handle*>(handle)); | |
172 } | |
173 | |
174 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { | |
175 rtc::CritScope cs(crit_); | |
176 return WebRtcVad_set_mode(static_cast<Handle*>(handle), | |
177 MapSetting(likelihood_)); | |
178 } | |
179 | |
180 int VoiceDetectionImpl::num_handles_required() const { | |
181 return 1; | |
182 } | |
183 | |
184 int VoiceDetectionImpl::GetHandleError(void* handle) const { | |
185 // The VAD has no get_error() function. | |
186 assert(handle != NULL); | |
187 return apm_->kUnspecifiedError; | |
188 } | |
189 } // namespace webrtc | 154 } // namespace webrtc |
OLD | NEW |