webrtc/modules/audio_processing/vad/pitch_based_vad.cc - Issue 1192863006: Revert "Pull the Voice Activity Detector out from the AGC"

Unified Diff: webrtc/modules/audio_processing/vad/pitch_based_vad.cc

Issue 1192863006: Revert "Pull the Voice Activity Detector out from the AGC" (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/vad/pitch_based_vad.h ('k') | webrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/vad/pitch_based_vad.cc

diff --git a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc

deleted file mode 100644

index 91638d007edc0607a97469f196ba77975bd05e89..0000000000000000000000000000000000000000

--- a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc

+++ /dev/null

@@ -1,124 +0,0 @@

-/*

- *

- * Use of this source code is governed by a BSD-style license

- * that can be found in the LICENSE file in the root of the source

- * tree. An additional intellectual property rights grant can be found

- * in the file PATENTS. All contributing project authors may

- * be found in the AUTHORS file in the root of the source tree.

- */

-#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"

-#include <assert.h>

-#include <math.h>

-#include <string.h>

-#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"

-#include "webrtc/modules/audio_processing/vad/common.h"

-#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"

-#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"

-#include "webrtc/modules/interface/module_common_types.h"

-namespace webrtc {

-static_assert(kNoiseGmmDim == kVoiceGmmDim,

- "noise and voice gmm dimension not equal");

-// These values should match MATLAB counterparts for unit-tests to pass.

-static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.

-static const double kInitialPriorProbability = 0.3;

-static const int kTransientWidthThreshold = 7;

-static const double kLowProbabilityThreshold = 0.2;

-static double LimitProbability(double p) {

- const double kLimHigh = 0.99;

- const double kLimLow = 0.01;

- if (p > kLimHigh)

- p = kLimHigh;

- else if (p < kLimLow)

- p = kLimLow;

- return p;

-PitchBasedVad::PitchBasedVad()

- : p_prior_(kInitialPriorProbability),

- circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {

- // Setup noise GMM.

- noise_gmm_.dimension = kNoiseGmmDim;

- noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;

- noise_gmm_.weight = kNoiseGmmWeights;

- noise_gmm_.mean = &kNoiseGmmMean[0][0];

- noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];

- // Setup voice GMM.

- voice_gmm_.dimension = kVoiceGmmDim;

- voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;

- voice_gmm_.weight = kVoiceGmmWeights;

- voice_gmm_.mean = &kVoiceGmmMean[0][0];

- voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];

-PitchBasedVad::~PitchBasedVad() {

-int PitchBasedVad::VoicingProbability(const AudioFeatures& features,

- double* p_combined) {

- double p;

- double gmm_features[3];

- double pdf_features_given_voice;

- double pdf_features_given_noise;

- // These limits are the same in matlab implementation 'VoicingProbGMM().'

- const double kLimLowLogPitchGain = -2.0;

- const double kLimHighLogPitchGain = -0.9;

- const double kLimLowSpectralPeak = 200;

- const double kLimHighSpectralPeak = 2000;

- const double kEps = 1e-12;

- for (int n = 0; n < features.num_frames; n++) {

- gmm_features[0] = features.log_pitch_gain[n];

- gmm_features[1] = features.spectral_peak[n];

- gmm_features[2] = features.pitch_lag_hz[n];

- pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);

- pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);

- if (features.spectral_peak[n] < kLimLowSpectralPeak ||

- features.spectral_peak[n] > kLimHighSpectralPeak ||

- features.log_pitch_gain[n] < kLimLowLogPitchGain) {

- pdf_features_given_voice = kEps * pdf_features_given_noise;

- } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {

- pdf_features_given_noise = kEps * pdf_features_given_voice;

- }

- p = p_prior_ * pdf_features_given_voice /

- (pdf_features_given_voice * p_prior_ +

- pdf_features_given_noise * (1 - p_prior_));

- p = LimitProbability(p);

- // Combine pitch-based probability with standalone probability, before

- // updating prior probabilities.

- double prod_active = p * p_combined[n];

- double prod_inactive = (1 - p) * (1 - p_combined[n]);

- p_combined[n] = prod_active / (prod_active + prod_inactive);

- if (UpdatePrior(p_combined[n]) < 0)

- return -1;

- // Limit prior probability. With a zero prior probability the posterior

- // probability is always zero.

- p_prior_ = LimitProbability(p_prior_);

- }

- return 0;

-int PitchBasedVad::UpdatePrior(double p) {

- circular_buffer_->Insert(p);

- if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,

- kLowProbabilityThreshold) < 0)

- return -1;

- p_prior_ = circular_buffer_->Mean();

- return 0;

-} // namespace webrtc