Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(217)

Unified Diff: webrtc/modules/audio_processing/agc/pitch_based_vad.cc

Issue 1212543002: Pull the Voice Activity Detector out from the AGC (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/agc/pitch_based_vad.cc
diff --git a/webrtc/modules/audio_processing/agc/pitch_based_vad.cc b/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
deleted file mode 100644
index 0cfa52a0108afb0b255a4c10d519467dda0085c3..0000000000000000000000000000000000000000
--- a/webrtc/modules/audio_processing/agc/pitch_based_vad.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
-
-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
-#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
-#include "webrtc/modules/audio_processing/agc/common.h"
-#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
-#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
-#include "webrtc/modules/interface/module_common_types.h"
-
-namespace webrtc {
-
-static_assert(kNoiseGmmDim == kVoiceGmmDim,
- "noise and voice gmm dimension not equal");
-
-// These values should match MATLAB counterparts for unit-tests to pass.
-static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
-static const double kInitialPriorProbability = 0.3;
-static const int kTransientWidthThreshold = 7;
-static const double kLowProbabilityThreshold = 0.2;
-
-static double LimitProbability(double p) {
- const double kLimHigh = 0.99;
- const double kLimLow = 0.01;
-
- if (p > kLimHigh)
- p = kLimHigh;
- else if (p < kLimLow)
- p = kLimLow;
- return p;
-}
-
-PitchBasedVad::PitchBasedVad()
- : p_prior_(kInitialPriorProbability),
- circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
- // Setup noise GMM.
- noise_gmm_.dimension = kNoiseGmmDim;
- noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
- noise_gmm_.weight = kNoiseGmmWeights;
- noise_gmm_.mean = &kNoiseGmmMean[0][0];
- noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
-
- // Setup voice GMM.
- voice_gmm_.dimension = kVoiceGmmDim;
- voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
- voice_gmm_.weight = kVoiceGmmWeights;
- voice_gmm_.mean = &kVoiceGmmMean[0][0];
- voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
-}
-
-PitchBasedVad::~PitchBasedVad() {}
-
-int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
- double* p_combined) {
- double p;
- double gmm_features[3];
- double pdf_features_given_voice;
- double pdf_features_given_noise;
- // These limits are the same in matlab implementation 'VoicingProbGMM().'
- const double kLimLowLogPitchGain = -2.0;
- const double kLimHighLogPitchGain = -0.9;
- const double kLimLowSpectralPeak = 200;
- const double kLimHighSpectralPeak = 2000;
- const double kEps = 1e-12;
- for (int n = 0; n < features.num_frames; n++) {
- gmm_features[0] = features.log_pitch_gain[n];
- gmm_features[1] = features.spectral_peak[n];
- gmm_features[2] = features.pitch_lag_hz[n];
-
- pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
- pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
-
- if (features.spectral_peak[n] < kLimLowSpectralPeak ||
- features.spectral_peak[n] > kLimHighSpectralPeak ||
- features.log_pitch_gain[n] < kLimLowLogPitchGain) {
- pdf_features_given_voice = kEps * pdf_features_given_noise;
- } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
- pdf_features_given_noise = kEps * pdf_features_given_voice;
- }
-
- p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
- p_prior_ + pdf_features_given_noise * (1 - p_prior_));
-
- p = LimitProbability(p);
-
- // Combine pitch-based probability with standalone probability, before
- // updating prior probabilities.
- double prod_active = p * p_combined[n];
- double prod_inactive = (1 - p) * (1 - p_combined[n]);
- p_combined[n] = prod_active / (prod_active + prod_inactive);
-
- if (UpdatePrior(p_combined[n]) < 0)
- return -1;
- // Limit prior probability. With a zero prior probability the posterior
- // probability is always zero.
- p_prior_ = LimitProbability(p_prior_);
- }
- return 0;
-}
-
-int PitchBasedVad::UpdatePrior(double p) {
- circular_buffer_->Insert(p);
- if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
- kLowProbabilityThreshold) < 0)
- return -1;
- p_prior_ = circular_buffer_->Mean();
- return 0;
-}
-
-} // namespace webrtc

Powered by Google App Engine
This is Rietveld 408576698