| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" | 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" |
| 12 | 12 |
| 13 #include <assert.h> | |
| 14 | |
| 15 #include "webrtc/base/criticalsection.h" | |
| 16 #include "webrtc/base/thread_checker.h" | |
| 17 #include "webrtc/common_audio/vad/include/webrtc_vad.h" | 13 #include "webrtc/common_audio/vad/include/webrtc_vad.h" |
| 18 #include "webrtc/modules/audio_processing/audio_buffer.h" | 14 #include "webrtc/modules/audio_processing/audio_buffer.h" |
| 19 | 15 |
| 20 namespace webrtc { | 16 namespace webrtc { |
| 17 class VoiceDetectionImpl::Vad { |
| 18 public: |
| 19 Vad() { |
| 20 state_ = WebRtcVad_Create(); |
| 21 RTC_CHECK(state_); |
| 22 int error = WebRtcVad_Init(state_); |
| 23 RTC_DCHECK_EQ(0, error); |
| 24 } |
| 25 ~Vad() { |
| 26 WebRtcVad_Free(state_); |
| 27 } |
| 28 VadInst* state() { return state_; } |
| 29 private: |
| 30 VadInst* state_ = nullptr; |
| 31 RTC_DISALLOW_COPY_AND_ASSIGN(Vad); |
| 32 }; |
| 21 | 33 |
| 22 typedef VadInst Handle; | 34 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) |
| 23 | 35 : crit_(crit) { |
| 24 namespace { | |
| 25 int MapSetting(VoiceDetection::Likelihood likelihood) { | |
| 26 switch (likelihood) { | |
| 27 case VoiceDetection::kVeryLowLikelihood: | |
| 28 return 3; | |
| 29 case VoiceDetection::kLowLikelihood: | |
| 30 return 2; | |
| 31 case VoiceDetection::kModerateLikelihood: | |
| 32 return 1; | |
| 33 case VoiceDetection::kHighLikelihood: | |
| 34 return 0; | |
| 35 } | |
| 36 assert(false); | |
| 37 return -1; | |
| 38 } | |
| 39 } // namespace | |
| 40 | |
| 41 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, | |
| 42 rtc::CriticalSection* crit) | |
| 43 : ProcessingComponent(), | |
| 44 apm_(apm), | |
| 45 crit_(crit), | |
| 46 stream_has_voice_(false), | |
| 47 using_external_vad_(false), | |
| 48 likelihood_(kLowLikelihood), | |
| 49 frame_size_ms_(10), | |
| 50 frame_size_samples_(0) { | |
| 51 RTC_DCHECK(apm); | |
| 52 RTC_DCHECK(crit); | 36 RTC_DCHECK(crit); |
| 53 } | 37 } |
| 54 | 38 |
| 55 VoiceDetectionImpl::~VoiceDetectionImpl() {} | 39 VoiceDetectionImpl::~VoiceDetectionImpl() {} |
| 56 | 40 |
| 57 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { | 41 void VoiceDetectionImpl::Initialize(int sample_rate_hz) { |
| 58 rtc::CritScope cs(crit_); | 42 rtc::CritScope cs(crit_); |
| 59 if (!is_component_enabled()) { | 43 sample_rate_hz_ = sample_rate_hz; |
| 60 return apm_->kNoError; | 44 rtc::scoped_ptr<Vad> new_vad; |
| 45 if (enabled_) { |
| 46 new_vad.reset(new Vad()); |
| 47 } |
| 48 vad_.swap(new_vad); |
| 49 using_external_vad_ = false; |
| 50 frame_size_samples_ = |
| 51 static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; |
| 52 set_likelihood(likelihood_); |
| 53 } |
| 54 |
| 55 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { |
| 56 rtc::CritScope cs(crit_); |
| 57 if (!enabled_) { |
| 58 return; |
| 59 } |
| 60 if (using_external_vad_) { |
| 61 using_external_vad_ = false; |
| 62 return; |
| 61 } | 63 } |
| 62 | 64 |
| 63 if (using_external_vad_) { | 65 RTC_DCHECK_GE(160u, audio->num_frames_per_band()); |
| 64 using_external_vad_ = false; | |
| 65 return apm_->kNoError; | |
| 66 } | |
| 67 assert(audio->num_frames_per_band() <= 160); | |
| 68 | |
| 69 // TODO(ajm): concatenate data in frame buffer here. | 66 // TODO(ajm): concatenate data in frame buffer here. |
| 70 | 67 int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, |
| 71 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), | |
| 72 apm_->proc_split_sample_rate_hz(), | |
| 73 audio->mixed_low_pass_data(), | 68 audio->mixed_low_pass_data(), |
| 74 frame_size_samples_); | 69 frame_size_samples_); |
| 75 if (vad_ret == 0) { | 70 if (vad_ret == 0) { |
| 76 stream_has_voice_ = false; | 71 stream_has_voice_ = false; |
| 77 audio->set_activity(AudioFrame::kVadPassive); | 72 audio->set_activity(AudioFrame::kVadPassive); |
| 78 } else if (vad_ret == 1) { | 73 } else if (vad_ret == 1) { |
| 79 stream_has_voice_ = true; | 74 stream_has_voice_ = true; |
| 80 audio->set_activity(AudioFrame::kVadActive); | 75 audio->set_activity(AudioFrame::kVadActive); |
| 81 } else { | 76 } else { |
| 82 return apm_->kUnspecifiedError; | 77 RTC_NOTREACHED(); |
| 83 } | 78 } |
| 84 | |
| 85 return apm_->kNoError; | |
| 86 } | 79 } |
| 87 | 80 |
| 88 int VoiceDetectionImpl::Enable(bool enable) { | 81 int VoiceDetectionImpl::Enable(bool enable) { |
| 89 rtc::CritScope cs(crit_); | 82 rtc::CritScope cs(crit_); |
| 90 return EnableComponent(enable); | 83 if (enabled_ != enable) { |
| 84 enabled_ = enable; |
| 85 Initialize(sample_rate_hz_); |
| 86 } |
| 87 return AudioProcessing::kNoError; |
| 91 } | 88 } |
| 92 | 89 |
| 93 bool VoiceDetectionImpl::is_enabled() const { | 90 bool VoiceDetectionImpl::is_enabled() const { |
| 94 rtc::CritScope cs(crit_); | 91 rtc::CritScope cs(crit_); |
| 95 return is_component_enabled(); | 92 return enabled_; |
| 96 } | 93 } |
| 97 | 94 |
| 98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { | 95 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { |
| 99 rtc::CritScope cs(crit_); | 96 rtc::CritScope cs(crit_); |
| 100 using_external_vad_ = true; | 97 using_external_vad_ = true; |
| 101 stream_has_voice_ = has_voice; | 98 stream_has_voice_ = has_voice; |
| 102 return apm_->kNoError; | 99 return AudioProcessing::kNoError; |
| 103 } | 100 } |
| 104 | 101 |
| 105 bool VoiceDetectionImpl::stream_has_voice() const { | 102 bool VoiceDetectionImpl::stream_has_voice() const { |
| 106 rtc::CritScope cs(crit_); | 103 rtc::CritScope cs(crit_); |
| 107 // TODO(ajm): enable this assertion? | 104 // TODO(ajm): enable this assertion? |
| 108 //assert(using_external_vad_ || is_component_enabled()); | 105 //assert(using_external_vad_ || is_component_enabled()); |
| 109 return stream_has_voice_; | 106 return stream_has_voice_; |
| 110 } | 107 } |
| 111 | 108 |
| 112 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { | 109 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { |
| 113 rtc::CritScope cs(crit_); | 110 rtc::CritScope cs(crit_); |
| 114 if (MapSetting(likelihood) == -1) { | 111 likelihood_ = likelihood; |
| 115 return apm_->kBadParameterError; | 112 if (enabled_) { |
| 113 int mode = 2; |
| 114 switch (likelihood) { |
| 115 case VoiceDetection::kVeryLowLikelihood: |
| 116 mode = 3; |
| 117 break; |
| 118 case VoiceDetection::kLowLikelihood: |
| 119 mode = 2; |
| 120 break; |
| 121 case VoiceDetection::kModerateLikelihood: |
| 122 mode = 1; |
| 123 break; |
| 124 case VoiceDetection::kHighLikelihood: |
| 125 mode = 0; |
| 126 break; |
| 127 default: |
| 128 RTC_NOTREACHED(); |
| 129 break; |
| 130 } |
| 131 int error = WebRtcVad_set_mode(vad_->state(), mode); |
| 132 RTC_DCHECK_EQ(0, error); |
| 116 } | 133 } |
| 117 | 134 return AudioProcessing::kNoError; |
| 118 likelihood_ = likelihood; | |
| 119 return Configure(); | |
| 120 } | 135 } |
| 121 | 136 |
| 122 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { | 137 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { |
| 123 rtc::CritScope cs(crit_); | 138 rtc::CritScope cs(crit_); |
| 124 return likelihood_; | 139 return likelihood_; |
| 125 } | 140 } |
| 126 | 141 |
| 127 int VoiceDetectionImpl::set_frame_size_ms(int size) { | 142 int VoiceDetectionImpl::set_frame_size_ms(int size) { |
| 128 rtc::CritScope cs(crit_); | 143 rtc::CritScope cs(crit_); |
| 129 assert(size == 10); // TODO(ajm): remove when supported. | 144 RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. |
| 130 if (size != 10 && | |
| 131 size != 20 && | |
| 132 size != 30) { | |
| 133 return apm_->kBadParameterError; | |
| 134 } | |
| 135 | |
| 136 frame_size_ms_ = size; | 145 frame_size_ms_ = size; |
| 137 | 146 Initialize(sample_rate_hz_); |
| 138 return Initialize(); | 147 return AudioProcessing::kNoError; |
| 139 } | 148 } |
| 140 | 149 |
| 141 int VoiceDetectionImpl::frame_size_ms() const { | 150 int VoiceDetectionImpl::frame_size_ms() const { |
| 142 rtc::CritScope cs(crit_); | 151 rtc::CritScope cs(crit_); |
| 143 return frame_size_ms_; | 152 return frame_size_ms_; |
| 144 } | 153 } |
| 145 | |
| 146 int VoiceDetectionImpl::Initialize() { | |
| 147 int err = ProcessingComponent::Initialize(); | |
| 148 | |
| 149 rtc::CritScope cs(crit_); | |
| 150 if (err != apm_->kNoError || !is_component_enabled()) { | |
| 151 return err; | |
| 152 } | |
| 153 | |
| 154 using_external_vad_ = false; | |
| 155 frame_size_samples_ = static_cast<size_t>( | |
| 156 frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); | |
| 157 // TODO(ajm): intialize frame buffer here. | |
| 158 | |
| 159 return apm_->kNoError; | |
| 160 } | |
| 161 | |
| 162 void* VoiceDetectionImpl::CreateHandle() const { | |
| 163 return WebRtcVad_Create(); | |
| 164 } | |
| 165 | |
| 166 void VoiceDetectionImpl::DestroyHandle(void* handle) const { | |
| 167 WebRtcVad_Free(static_cast<Handle*>(handle)); | |
| 168 } | |
| 169 | |
| 170 int VoiceDetectionImpl::InitializeHandle(void* handle) const { | |
| 171 return WebRtcVad_Init(static_cast<Handle*>(handle)); | |
| 172 } | |
| 173 | |
| 174 int VoiceDetectionImpl::ConfigureHandle(void* handle) const { | |
| 175 rtc::CritScope cs(crit_); | |
| 176 return WebRtcVad_set_mode(static_cast<Handle*>(handle), | |
| 177 MapSetting(likelihood_)); | |
| 178 } | |
| 179 | |
| 180 int VoiceDetectionImpl::num_handles_required() const { | |
| 181 return 1; | |
| 182 } | |
| 183 | |
| 184 int VoiceDetectionImpl::GetHandleError(void* handle) const { | |
| 185 // The VAD has no get_error() function. | |
| 186 assert(handle != NULL); | |
| 187 return apm_->kUnspecifiedError; | |
| 188 } | |
| 189 } // namespace webrtc | 154 } // namespace webrtc |
| OLD | NEW |