webrtc/modules/audio_processing/vad/voice_activity_detector.cc - Issue 1181933002: Pull the Voice Activity Detector out from the AGC

Unified Diff: webrtc/modules/audio_processing/vad/voice_activity_detector.cc

Issue 1181933002: Pull the Voice Activity Detector out from the AGC (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Remove unused files from isolate Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/vad/voice_activity_detector.h ('k') | webrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/vad/voice_activity_detector.cc

diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc

new file mode 100644

index 0000000000000000000000000000000000000000..79928d1aa0385793687d18ac5273fd4a06b9a713

--- /dev/null

+++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc

@@ -0,0 +1,87 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ */

+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"

+#include <algorithm>

+#include "webrtc/base/checks.h"

+namespace webrtc {

+namespace {

+const int kMaxLength = 320;

+const int kNumChannels = 1;

+const double kDefaultVoiceValue = 1.0;

+const double kNeutralProbability = 0.5;

+const double kLowProbability = 0.01;

+} // namespace

+VoiceActivityDetector::VoiceActivityDetector()

+ : last_voice_probability_(kDefaultVoiceValue),

+ // Initialize to the most common resampling situation.

+ resampler_(kMaxLength, kLength10Ms, kNumChannels),

+ standalone_vad_(StandaloneVad::Create()) {

+// Because ISAC has a different chunk length, it updates

+// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.

+// Otherwise it clears them.

+void VoiceActivityDetector::ProcessChunk(const int16_t* audio,

+ int length,

+ int sample_rate_hz) {

+ DCHECK_EQ(length, sample_rate_hz / 100);

+ DCHECK_LE(length, kMaxLength);

+ // Resample to the required rate.

+ const int16_t* resampled_ptr = audio;

+ if (sample_rate_hz != kSampleRateHz) {

+ CHECK_EQ(

+ resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),

+ 0);

+ resampler_.Push(audio, length, resampled_, kLength10Ms, length);

+ resampled_ptr = resampled_;

+ }

+ DCHECK_EQ(length, kLength10Ms);

+ // Each chunk needs to be passed into |standalone_vad_|, because internally it

+ // buffers the audio and processes it all at once when GetActivity() is

+ // called.

+ CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0);

+ audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);

+ chunkwise_voice_probabilities_.resize(features_.num_frames);

+ chunkwise_rms_.resize(features_.num_frames);

+ std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),

+ chunkwise_rms_.begin());

+ if (features_.num_frames > 0) {

+ if (features_.silence) {

+ // The other features are invalid, so set the voice probabilities to an

+ // arbitrary low value.

+ std::fill(chunkwise_voice_probabilities_.begin(),

+ chunkwise_voice_probabilities_.end(), kLowProbability);

+ } else {

+ std::fill(chunkwise_voice_probabilities_.begin(),

+ chunkwise_voice_probabilities_.end(), kNeutralProbability);

+ CHECK_GE(

+ standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],

+ chunkwise_voice_probabilities_.size()),

+ 0);

+ CHECK_GE(pitch_based_vad_.VoicingProbability(

+ features_, &chunkwise_voice_probabilities_[0]),

+ 0);

+ }

+ last_voice_probability_ = chunkwise_voice_probabilities_.back();

+ }

+} // namespace webrtc