Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(94)

Side by Side Diff: webrtc/modules/audio_processing/voice_detection_impl.cc

Issue 1494593004: Make VoiceDetection not a ProcessingComponent. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: rebase+comment Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/voice_detection_impl.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
12 12
13 #include <assert.h>
14
15 #include "webrtc/base/criticalsection.h"
16 #include "webrtc/base/thread_checker.h"
17 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 13 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
18 #include "webrtc/modules/audio_processing/audio_buffer.h" 14 #include "webrtc/modules/audio_processing/audio_buffer.h"
19 15
20 namespace webrtc { 16 namespace webrtc {
17 class VoiceDetectionImpl::Vad {
18 public:
19 Vad() {
20 state_ = WebRtcVad_Create();
21 RTC_CHECK(state_);
22 int error = WebRtcVad_Init(state_);
23 RTC_DCHECK_EQ(0, error);
24 }
25 ~Vad() {
26 WebRtcVad_Free(state_);
27 }
28 VadInst* state() { return state_; }
29 private:
30 VadInst* state_ = nullptr;
31 RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
32 };
21 33
22 typedef VadInst Handle; 34 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
23 35 : crit_(crit) {
24 namespace {
25 int MapSetting(VoiceDetection::Likelihood likelihood) {
26 switch (likelihood) {
27 case VoiceDetection::kVeryLowLikelihood:
28 return 3;
29 case VoiceDetection::kLowLikelihood:
30 return 2;
31 case VoiceDetection::kModerateLikelihood:
32 return 1;
33 case VoiceDetection::kHighLikelihood:
34 return 0;
35 }
36 assert(false);
37 return -1;
38 }
39 } // namespace
40
41 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
42 rtc::CriticalSection* crit)
43 : ProcessingComponent(),
44 apm_(apm),
45 crit_(crit),
46 stream_has_voice_(false),
47 using_external_vad_(false),
48 likelihood_(kLowLikelihood),
49 frame_size_ms_(10),
50 frame_size_samples_(0) {
51 RTC_DCHECK(apm);
52 RTC_DCHECK(crit); 36 RTC_DCHECK(crit);
53 } 37 }
54 38
55 VoiceDetectionImpl::~VoiceDetectionImpl() {} 39 VoiceDetectionImpl::~VoiceDetectionImpl() {}
56 40
57 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 41 void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
58 rtc::CritScope cs(crit_); 42 rtc::CritScope cs(crit_);
59 if (!is_component_enabled()) { 43 sample_rate_hz_ = sample_rate_hz;
60 return apm_->kNoError; 44 rtc::scoped_ptr<Vad> new_vad;
45 if (enabled_) {
46 new_vad.reset(new Vad());
47 }
48 vad_.swap(new_vad);
49 using_external_vad_ = false;
50 frame_size_samples_ =
51 static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
52 set_likelihood(likelihood_);
53 }
54
55 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
56 rtc::CritScope cs(crit_);
57 if (!enabled_) {
58 return;
59 }
60 if (using_external_vad_) {
61 using_external_vad_ = false;
62 return;
61 } 63 }
62 64
63 if (using_external_vad_) { 65 RTC_DCHECK_GE(160u, audio->num_frames_per_band());
64 using_external_vad_ = false;
65 return apm_->kNoError;
66 }
67 assert(audio->num_frames_per_band() <= 160);
68
69 // TODO(ajm): concatenate data in frame buffer here. 66 // TODO(ajm): concatenate data in frame buffer here.
70 67 int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
71 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
72 apm_->proc_split_sample_rate_hz(),
73 audio->mixed_low_pass_data(), 68 audio->mixed_low_pass_data(),
74 frame_size_samples_); 69 frame_size_samples_);
75 if (vad_ret == 0) { 70 if (vad_ret == 0) {
76 stream_has_voice_ = false; 71 stream_has_voice_ = false;
77 audio->set_activity(AudioFrame::kVadPassive); 72 audio->set_activity(AudioFrame::kVadPassive);
78 } else if (vad_ret == 1) { 73 } else if (vad_ret == 1) {
79 stream_has_voice_ = true; 74 stream_has_voice_ = true;
80 audio->set_activity(AudioFrame::kVadActive); 75 audio->set_activity(AudioFrame::kVadActive);
81 } else { 76 } else {
82 return apm_->kUnspecifiedError; 77 RTC_NOTREACHED();
83 } 78 }
84
85 return apm_->kNoError;
86 } 79 }
87 80
88 int VoiceDetectionImpl::Enable(bool enable) { 81 int VoiceDetectionImpl::Enable(bool enable) {
89 rtc::CritScope cs(crit_); 82 rtc::CritScope cs(crit_);
90 return EnableComponent(enable); 83 if (enabled_ != enable) {
84 enabled_ = enable;
85 Initialize(sample_rate_hz_);
86 }
87 return AudioProcessing::kNoError;
91 } 88 }
92 89
93 bool VoiceDetectionImpl::is_enabled() const { 90 bool VoiceDetectionImpl::is_enabled() const {
94 rtc::CritScope cs(crit_); 91 rtc::CritScope cs(crit_);
95 return is_component_enabled(); 92 return enabled_;
96 } 93 }
97 94
98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 95 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
99 rtc::CritScope cs(crit_); 96 rtc::CritScope cs(crit_);
100 using_external_vad_ = true; 97 using_external_vad_ = true;
101 stream_has_voice_ = has_voice; 98 stream_has_voice_ = has_voice;
102 return apm_->kNoError; 99 return AudioProcessing::kNoError;
103 } 100 }
104 101
105 bool VoiceDetectionImpl::stream_has_voice() const { 102 bool VoiceDetectionImpl::stream_has_voice() const {
106 rtc::CritScope cs(crit_); 103 rtc::CritScope cs(crit_);
107 // TODO(ajm): enable this assertion? 104 // TODO(ajm): enable this assertion?
108 //assert(using_external_vad_ || is_component_enabled()); 105 //assert(using_external_vad_ || is_component_enabled());
109 return stream_has_voice_; 106 return stream_has_voice_;
110 } 107 }
111 108
112 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 109 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
113 rtc::CritScope cs(crit_); 110 rtc::CritScope cs(crit_);
114 if (MapSetting(likelihood) == -1) { 111 likelihood_ = likelihood;
115 return apm_->kBadParameterError; 112 if (enabled_) {
113 int mode = 2;
114 switch (likelihood) {
115 case VoiceDetection::kVeryLowLikelihood:
116 mode = 3;
117 break;
118 case VoiceDetection::kLowLikelihood:
119 mode = 2;
120 break;
121 case VoiceDetection::kModerateLikelihood:
122 mode = 1;
123 break;
124 case VoiceDetection::kHighLikelihood:
125 mode = 0;
126 break;
127 default:
128 RTC_NOTREACHED();
129 break;
130 }
131 int error = WebRtcVad_set_mode(vad_->state(), mode);
132 RTC_DCHECK_EQ(0, error);
116 } 133 }
117 134 return AudioProcessing::kNoError;
118 likelihood_ = likelihood;
119 return Configure();
120 } 135 }
121 136
122 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 137 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
123 rtc::CritScope cs(crit_); 138 rtc::CritScope cs(crit_);
124 return likelihood_; 139 return likelihood_;
125 } 140 }
126 141
127 int VoiceDetectionImpl::set_frame_size_ms(int size) { 142 int VoiceDetectionImpl::set_frame_size_ms(int size) {
128 rtc::CritScope cs(crit_); 143 rtc::CritScope cs(crit_);
129 assert(size == 10); // TODO(ajm): remove when supported. 144 RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
130 if (size != 10 &&
131 size != 20 &&
132 size != 30) {
133 return apm_->kBadParameterError;
134 }
135
136 frame_size_ms_ = size; 145 frame_size_ms_ = size;
137 146 Initialize(sample_rate_hz_);
138 return Initialize(); 147 return AudioProcessing::kNoError;
139 } 148 }
140 149
141 int VoiceDetectionImpl::frame_size_ms() const { 150 int VoiceDetectionImpl::frame_size_ms() const {
142 rtc::CritScope cs(crit_); 151 rtc::CritScope cs(crit_);
143 return frame_size_ms_; 152 return frame_size_ms_;
144 } 153 }
145
146 int VoiceDetectionImpl::Initialize() {
147 int err = ProcessingComponent::Initialize();
148
149 rtc::CritScope cs(crit_);
150 if (err != apm_->kNoError || !is_component_enabled()) {
151 return err;
152 }
153
154 using_external_vad_ = false;
155 frame_size_samples_ = static_cast<size_t>(
156 frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000);
157 // TODO(ajm): intialize frame buffer here.
158
159 return apm_->kNoError;
160 }
161
162 void* VoiceDetectionImpl::CreateHandle() const {
163 return WebRtcVad_Create();
164 }
165
166 void VoiceDetectionImpl::DestroyHandle(void* handle) const {
167 WebRtcVad_Free(static_cast<Handle*>(handle));
168 }
169
170 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
171 return WebRtcVad_Init(static_cast<Handle*>(handle));
172 }
173
174 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
175 rtc::CritScope cs(crit_);
176 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
177 MapSetting(likelihood_));
178 }
179
180 int VoiceDetectionImpl::num_handles_required() const {
181 return 1;
182 }
183
184 int VoiceDetectionImpl::GetHandleError(void* handle) const {
185 // The VAD has no get_error() function.
186 assert(handle != NULL);
187 return apm_->kUnspecifiedError;
188 }
189 } // namespace webrtc 154 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/voice_detection_impl.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698