webrtc/modules/audio_processing/vad/voice_activity_detector.cc - Issue 1181933002: Pull the Voice Activity Detector out from the AGC

Unified Diff: webrtc/modules/audio_processing/vad/voice_activity_detector.cc

Issue 1181933002: Pull the Voice Activity Detector out from the AGC (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Convert some parts to float Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/vad/voice_activity_detector.h ('K') | « webrtc/modules/audio_processing/vad/voice_activity_detector.h ('k') | webrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc » ('j') | webrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/vad/voice_activity_detector.cc

diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc

new file mode 100644

index 0000000000000000000000000000000000000000..fdce34f1e81bc09a6e0bce8edca6f548a0db7f10

--- /dev/null

+++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc

@@ -0,0 +1,83 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"

+#include "webrtc/base/checks.h"

+namespace webrtc {

+namespace {

+const int kMaxLength = 320;

+const int kNumChannels = 1;

+const double kDefaultVoiceValue = 1.0;

+const double kNeutralProbability = 0.5;

+const double kLowProbability = 0.01;

+} // namespace

+VoiceActivityDetector::VoiceActivityDetector()

+ : last_voice_probability_(kDefaultVoiceValue),

+ // Initialize to the most common resampling situation.

+ resampler_(kMaxLength, kLength10Ms, kNumChannels),

+ standalone_vad_(StandaloneVad::Create()) {

+// Because ISAC has a different chunk length, it updates

+// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.

+// Else it clears them.

Andrew MacDonald 2015/06/16 22:18:32 Else -> Otherwise

aluebs-webrtc 2015/06/17 01:44:31 Done.

+void VoiceActivityDetector::ProcessChunk(const int16_t* audio,

+ int length,

+ int sample_rate_hz) {

+ DCHECK_EQ(length, sample_rate_hz / 100);

+ DCHECK_LE(length, kMaxLength);

+ // Resample to the required rate.

+ const int16_t* resampled_ptr = audio;

+ if (sample_rate_hz != kSampleRateHz) {

+ CHECK_EQ(

+ resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),

+ 0);

+ resampler_.Push(audio, length, resampled_, kLength10Ms, length);

+ resampled_ptr = resampled_;

+ }

+ DCHECK_EQ(length, kLength10Ms);

+ CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0);

+ audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);

+ chunkwise_voice_probabilities_.clear();

+ chunkwise_rms_.clear();

+ if (features_.num_frames > 0) {

+ if (features_.silence) {

+ // The other features are invalid, so set the voice probabilities to an

+ // arbitrary low value.

+ chunkwise_voice_probabilities_.assign(features_.num_frames,

+ kLowProbability);

+ } else {

+ chunkwise_voice_probabilities_.assign(features_.num_frames,

+ kNeutralProbability);

+ CHECK_GE(

+ standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],

+ chunkwise_voice_probabilities_.size()),

+ 0);

+ CHECK_GE(pitch_based_vad_.VoicingProbability(

+ features_, &chunkwise_voice_probabilities_[0]),

+ 0);

+ }

+ last_voice_probability_ = chunkwise_voice_probabilities_.back();

+ }

+ for (int i = 0; i < features_.num_frames; ++i) {

+ chunkwise_rms_.push_back(features_.rms[i]);

+ }

+} // namespace webrtc