Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Unified Diff: webrtc/modules/audio_processing/vad/voice_activity_detector.cc

Issue 1212543002: Pull the Voice Activity Detector out from the AGC (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/vad/voice_activity_detector.cc
diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b39ed66b897eb84bfdb4ce8a61281b554729a99d
--- /dev/null
+++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
+
+#include <algorithm>
+
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+namespace {
+
+const int kMaxLength = 320;
+const int kNumChannels = 1;
+
+const double kDefaultVoiceValue = 1.0;
+const double kNeutralProbability = 0.5;
+const double kLowProbability = 0.01;
+
+} // namespace
+
+VoiceActivityDetector::VoiceActivityDetector()
+ : last_voice_probability_(kDefaultVoiceValue),
+ // Initialize to the most common resampling situation.
+ resampler_(kMaxLength, kLength10Ms, kNumChannels),
+ standalone_vad_(StandaloneVad::Create()) {
+}
+
+// Because ISAC has a different chunk length, it updates
+// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
+// Otherwise it clears them.
+void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
+ int length,
+ int sample_rate_hz) {
+ DCHECK_EQ(length, sample_rate_hz / 100);
+ DCHECK_LE(length, kMaxLength);
+ // Resample to the required rate.
+ const int16_t* resampled_ptr = audio;
+ if (sample_rate_hz != kSampleRateHz) {
+ CHECK_EQ(
+ resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
+ 0);
+ resampler_.Push(audio, length, resampled_, kLength10Ms, length);
+ resampled_ptr = resampled_;
+ }
+ DCHECK_EQ(length, kLength10Ms);
+
+ // Each chunk needs to be passed into |standalone_vad_|, because internally it
+ // buffers the audio and processes it all at once when GetActivity() is
+ // called.
+ CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
aluebs-webrtc 2015/06/29 23:34:32 The bug that ASAN caught was that I was passing in
Andrew MacDonald 2015/06/29 23:38:23 Thanks goodness for ASAN.
aluebs-webrtc 2015/06/29 23:40:56 Certainly :)
+
+ audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
+
+ chunkwise_voice_probabilities_.resize(features_.num_frames);
+ chunkwise_rms_.resize(features_.num_frames);
+ std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
+ chunkwise_rms_.begin());
+ if (features_.num_frames > 0) {
+ if (features_.silence) {
+ // The other features are invalid, so set the voice probabilities to an
+ // arbitrary low value.
+ std::fill(chunkwise_voice_probabilities_.begin(),
+ chunkwise_voice_probabilities_.end(), kLowProbability);
+ } else {
+ std::fill(chunkwise_voice_probabilities_.begin(),
+ chunkwise_voice_probabilities_.end(), kNeutralProbability);
+ CHECK_GE(
+ standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
+ chunkwise_voice_probabilities_.size()),
+ 0);
+ CHECK_GE(pitch_based_vad_.VoicingProbability(
+ features_, &chunkwise_voice_probabilities_[0]),
+ 0);
+ }
+ last_voice_probability_ = chunkwise_voice_probabilities_.back();
+ }
+}
+
+} // namespace webrtc

Powered by Google App Engine
This is Rietveld 408576698