| Index: webrtc/modules/audio_processing/vad/pitch_based_vad.cc
|
| diff --git a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc
|
| deleted file mode 100644
|
| index 91638d007edc0607a97469f196ba77975bd05e89..0000000000000000000000000000000000000000
|
| --- a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc
|
| +++ /dev/null
|
| @@ -1,124 +0,0 @@
|
| -/*
|
| - * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
| - *
|
| - * Use of this source code is governed by a BSD-style license
|
| - * that can be found in the LICENSE file in the root of the source
|
| - * tree. An additional intellectual property rights grant can be found
|
| - * in the file PATENTS. All contributing project authors may
|
| - * be found in the AUTHORS file in the root of the source tree.
|
| - */
|
| -
|
| -#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
|
| -
|
| -#include <assert.h>
|
| -#include <math.h>
|
| -#include <string.h>
|
| -
|
| -#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
|
| -#include "webrtc/modules/audio_processing/vad/common.h"
|
| -#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
|
| -#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
|
| -#include "webrtc/modules/interface/module_common_types.h"
|
| -
|
| -namespace webrtc {
|
| -
|
| -static_assert(kNoiseGmmDim == kVoiceGmmDim,
|
| - "noise and voice gmm dimension not equal");
|
| -
|
| -// These values should match MATLAB counterparts for unit-tests to pass.
|
| -static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
|
| -static const double kInitialPriorProbability = 0.3;
|
| -static const int kTransientWidthThreshold = 7;
|
| -static const double kLowProbabilityThreshold = 0.2;
|
| -
|
| -static double LimitProbability(double p) {
|
| - const double kLimHigh = 0.99;
|
| - const double kLimLow = 0.01;
|
| -
|
| - if (p > kLimHigh)
|
| - p = kLimHigh;
|
| - else if (p < kLimLow)
|
| - p = kLimLow;
|
| - return p;
|
| -}
|
| -
|
| -PitchBasedVad::PitchBasedVad()
|
| - : p_prior_(kInitialPriorProbability),
|
| - circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
|
| - // Setup noise GMM.
|
| - noise_gmm_.dimension = kNoiseGmmDim;
|
| - noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
|
| - noise_gmm_.weight = kNoiseGmmWeights;
|
| - noise_gmm_.mean = &kNoiseGmmMean[0][0];
|
| - noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
|
| -
|
| - // Setup voice GMM.
|
| - voice_gmm_.dimension = kVoiceGmmDim;
|
| - voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
|
| - voice_gmm_.weight = kVoiceGmmWeights;
|
| - voice_gmm_.mean = &kVoiceGmmMean[0][0];
|
| - voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
|
| -}
|
| -
|
| -PitchBasedVad::~PitchBasedVad() {
|
| -}
|
| -
|
| -int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
|
| - double* p_combined) {
|
| - double p;
|
| - double gmm_features[3];
|
| - double pdf_features_given_voice;
|
| - double pdf_features_given_noise;
|
| - // These limits are the same in matlab implementation 'VoicingProbGMM().'
|
| - const double kLimLowLogPitchGain = -2.0;
|
| - const double kLimHighLogPitchGain = -0.9;
|
| - const double kLimLowSpectralPeak = 200;
|
| - const double kLimHighSpectralPeak = 2000;
|
| - const double kEps = 1e-12;
|
| - for (int n = 0; n < features.num_frames; n++) {
|
| - gmm_features[0] = features.log_pitch_gain[n];
|
| - gmm_features[1] = features.spectral_peak[n];
|
| - gmm_features[2] = features.pitch_lag_hz[n];
|
| -
|
| - pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
|
| - pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
|
| -
|
| - if (features.spectral_peak[n] < kLimLowSpectralPeak ||
|
| - features.spectral_peak[n] > kLimHighSpectralPeak ||
|
| - features.log_pitch_gain[n] < kLimLowLogPitchGain) {
|
| - pdf_features_given_voice = kEps * pdf_features_given_noise;
|
| - } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
|
| - pdf_features_given_noise = kEps * pdf_features_given_voice;
|
| - }
|
| -
|
| - p = p_prior_ * pdf_features_given_voice /
|
| - (pdf_features_given_voice * p_prior_ +
|
| - pdf_features_given_noise * (1 - p_prior_));
|
| -
|
| - p = LimitProbability(p);
|
| -
|
| - // Combine pitch-based probability with standalone probability, before
|
| - // updating prior probabilities.
|
| - double prod_active = p * p_combined[n];
|
| - double prod_inactive = (1 - p) * (1 - p_combined[n]);
|
| - p_combined[n] = prod_active / (prod_active + prod_inactive);
|
| -
|
| - if (UpdatePrior(p_combined[n]) < 0)
|
| - return -1;
|
| - // Limit prior probability. With a zero prior probability the posterior
|
| - // probability is always zero.
|
| - p_prior_ = LimitProbability(p_prior_);
|
| - }
|
| - return 0;
|
| -}
|
| -
|
| -int PitchBasedVad::UpdatePrior(double p) {
|
| - circular_buffer_->Insert(p);
|
| - if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
|
| - kLowProbabilityThreshold) < 0)
|
| - return -1;
|
| - p_prior_ = circular_buffer_->Mean();
|
| - return 0;
|
| -}
|
| -
|
| -} // namespace webrtc
|
|
|