| Index: webrtc/modules/audio_processing/voice_detection_impl.cc
|
| diff --git a/webrtc/modules/audio_processing/voice_detection_impl.cc b/webrtc/modules/audio_processing/voice_detection_impl.cc
|
| index 25c7269cb446aa149a3d7b4f4f2337f2cbb98b1c..22d218c371b8a54f430bf8bf4f251a3aaf52ef2c 100644
|
| --- a/webrtc/modules/audio_processing/voice_detection_impl.cc
|
| +++ b/webrtc/modules/audio_processing/voice_detection_impl.cc
|
| @@ -10,66 +10,61 @@
|
|
|
| #include "webrtc/modules/audio_processing/voice_detection_impl.h"
|
|
|
| -#include <assert.h>
|
| -
|
| -#include "webrtc/base/criticalsection.h"
|
| -#include "webrtc/base/thread_checker.h"
|
| #include "webrtc/common_audio/vad/include/webrtc_vad.h"
|
| #include "webrtc/modules/audio_processing/audio_buffer.h"
|
|
|
| namespace webrtc {
|
| -
|
| -typedef VadInst Handle;
|
| -
|
| -namespace {
|
| -int MapSetting(VoiceDetection::Likelihood likelihood) {
|
| - switch (likelihood) {
|
| - case VoiceDetection::kVeryLowLikelihood:
|
| - return 3;
|
| - case VoiceDetection::kLowLikelihood:
|
| - return 2;
|
| - case VoiceDetection::kModerateLikelihood:
|
| - return 1;
|
| - case VoiceDetection::kHighLikelihood:
|
| - return 0;
|
| +class VoiceDetectionImpl::Vad {
|
| + public:
|
| + Vad() {
|
| + state_ = WebRtcVad_Create();
|
| + RTC_CHECK(state_);
|
| + int error = WebRtcVad_Init(state_);
|
| + RTC_DCHECK_EQ(0, error);
|
| }
|
| - assert(false);
|
| - return -1;
|
| -}
|
| -} // namespace
|
| -
|
| -VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm,
|
| - rtc::CriticalSection* crit)
|
| - : ProcessingComponent(),
|
| - apm_(apm),
|
| - crit_(crit),
|
| - stream_has_voice_(false),
|
| - using_external_vad_(false),
|
| - likelihood_(kLowLikelihood),
|
| - frame_size_ms_(10),
|
| - frame_size_samples_(0) {
|
| - RTC_DCHECK(apm);
|
| + ~Vad() {
|
| + WebRtcVad_Free(state_);
|
| + }
|
| + VadInst* state() { return state_; }
|
| + private:
|
| + VadInst* state_ = nullptr;
|
| + RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
|
| +};
|
| +
|
| +VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
|
| + : crit_(crit) {
|
| RTC_DCHECK(crit);
|
| }
|
|
|
| VoiceDetectionImpl::~VoiceDetectionImpl() {}
|
|
|
| -int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
| +void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
|
| rtc::CritScope cs(crit_);
|
| - if (!is_component_enabled()) {
|
| - return apm_->kNoError;
|
| + sample_rate_hz_ = sample_rate_hz;
|
| + rtc::scoped_ptr<Vad> new_vad;
|
| + if (enabled_) {
|
| + new_vad.reset(new Vad());
|
| }
|
| + vad_.swap(new_vad);
|
| + using_external_vad_ = false;
|
| + frame_size_samples_ =
|
| + static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
|
| + set_likelihood(likelihood_);
|
| +}
|
|
|
| +void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
| + rtc::CritScope cs(crit_);
|
| + if (!enabled_) {
|
| + return;
|
| + }
|
| if (using_external_vad_) {
|
| using_external_vad_ = false;
|
| - return apm_->kNoError;
|
| + return;
|
| }
|
| - assert(audio->num_frames_per_band() <= 160);
|
|
|
| + RTC_DCHECK_GE(160u, audio->num_frames_per_band());
|
| // TODO(ajm): concatenate data in frame buffer here.
|
| -
|
| - int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
|
| - apm_->proc_split_sample_rate_hz(),
|
| + int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
|
| audio->mixed_low_pass_data(),
|
| frame_size_samples_);
|
| if (vad_ret == 0) {
|
| @@ -79,27 +74,29 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
| stream_has_voice_ = true;
|
| audio->set_activity(AudioFrame::kVadActive);
|
| } else {
|
| - return apm_->kUnspecifiedError;
|
| + RTC_NOTREACHED();
|
| }
|
| -
|
| - return apm_->kNoError;
|
| }
|
|
|
| int VoiceDetectionImpl::Enable(bool enable) {
|
| rtc::CritScope cs(crit_);
|
| - return EnableComponent(enable);
|
| + if (enabled_ != enable) {
|
| + enabled_ = enable;
|
| + Initialize(sample_rate_hz_);
|
| + }
|
| + return AudioProcessing::kNoError;
|
| }
|
|
|
| bool VoiceDetectionImpl::is_enabled() const {
|
| rtc::CritScope cs(crit_);
|
| - return is_component_enabled();
|
| + return enabled_;
|
| }
|
|
|
| int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
|
| rtc::CritScope cs(crit_);
|
| using_external_vad_ = true;
|
| stream_has_voice_ = has_voice;
|
| - return apm_->kNoError;
|
| + return AudioProcessing::kNoError;
|
| }
|
|
|
| bool VoiceDetectionImpl::stream_has_voice() const {
|
| @@ -111,12 +108,30 @@ bool VoiceDetectionImpl::stream_has_voice() const {
|
|
|
| int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
|
| rtc::CritScope cs(crit_);
|
| - if (MapSetting(likelihood) == -1) {
|
| - return apm_->kBadParameterError;
|
| - }
|
| -
|
| likelihood_ = likelihood;
|
| - return Configure();
|
| + if (enabled_) {
|
| + int mode = 2;
|
| + switch (likelihood) {
|
| + case VoiceDetection::kVeryLowLikelihood:
|
| + mode = 3;
|
| + break;
|
| + case VoiceDetection::kLowLikelihood:
|
| + mode = 2;
|
| + break;
|
| + case VoiceDetection::kModerateLikelihood:
|
| + mode = 1;
|
| + break;
|
| + case VoiceDetection::kHighLikelihood:
|
| + mode = 0;
|
| + break;
|
| + default:
|
| + RTC_NOTREACHED();
|
| + break;
|
| + }
|
| + int error = WebRtcVad_set_mode(vad_->state(), mode);
|
| + RTC_DCHECK_EQ(0, error);
|
| + }
|
| + return AudioProcessing::kNoError;
|
| }
|
|
|
| VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
|
| @@ -126,64 +141,14 @@ VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
|
|
|
| int VoiceDetectionImpl::set_frame_size_ms(int size) {
|
| rtc::CritScope cs(crit_);
|
| - assert(size == 10); // TODO(ajm): remove when supported.
|
| - if (size != 10 &&
|
| - size != 20 &&
|
| - size != 30) {
|
| - return apm_->kBadParameterError;
|
| - }
|
| -
|
| + RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
|
| frame_size_ms_ = size;
|
| -
|
| - return Initialize();
|
| + Initialize(sample_rate_hz_);
|
| + return AudioProcessing::kNoError;
|
| }
|
|
|
| int VoiceDetectionImpl::frame_size_ms() const {
|
| rtc::CritScope cs(crit_);
|
| return frame_size_ms_;
|
| }
|
| -
|
| -int VoiceDetectionImpl::Initialize() {
|
| - int err = ProcessingComponent::Initialize();
|
| -
|
| - rtc::CritScope cs(crit_);
|
| - if (err != apm_->kNoError || !is_component_enabled()) {
|
| - return err;
|
| - }
|
| -
|
| - using_external_vad_ = false;
|
| - frame_size_samples_ = static_cast<size_t>(
|
| - frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000);
|
| - // TODO(ajm): intialize frame buffer here.
|
| -
|
| - return apm_->kNoError;
|
| -}
|
| -
|
| -void* VoiceDetectionImpl::CreateHandle() const {
|
| - return WebRtcVad_Create();
|
| -}
|
| -
|
| -void VoiceDetectionImpl::DestroyHandle(void* handle) const {
|
| - WebRtcVad_Free(static_cast<Handle*>(handle));
|
| -}
|
| -
|
| -int VoiceDetectionImpl::InitializeHandle(void* handle) const {
|
| - return WebRtcVad_Init(static_cast<Handle*>(handle));
|
| -}
|
| -
|
| -int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
|
| - rtc::CritScope cs(crit_);
|
| - return WebRtcVad_set_mode(static_cast<Handle*>(handle),
|
| - MapSetting(likelihood_));
|
| -}
|
| -
|
| -int VoiceDetectionImpl::num_handles_required() const {
|
| - return 1;
|
| -}
|
| -
|
| -int VoiceDetectionImpl::GetHandleError(void* handle) const {
|
| - // The VAD has no get_error() function.
|
| - assert(handle != NULL);
|
| - return apm_->kUnspecifiedError;
|
| -}
|
| } // namespace webrtc
|
|
|