Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(299)

Side by Side Diff: webrtc/modules/audio_processing/agc/pitch_based_vad.cc

Issue 1192863006: Revert "Pull the Voice Activity Detector out from the AGC" (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" 11 #include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
12 12
13 #include <assert.h> 13 #include <assert.h>
14 #include <math.h> 14 #include <math.h>
15 #include <string.h> 15 #include <string.h>
16 16
17 #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" 17 #include "webrtc/modules/audio_processing/agc/circular_buffer.h"
18 #include "webrtc/modules/audio_processing/vad/common.h" 18 #include "webrtc/modules/audio_processing/agc/common.h"
19 #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h" 19 #include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
20 #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h" 20 #include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
21 #include "webrtc/modules/interface/module_common_types.h" 21 #include "webrtc/modules/interface/module_common_types.h"
22 22
23 namespace webrtc { 23 namespace webrtc {
24 24
25 static_assert(kNoiseGmmDim == kVoiceGmmDim, 25 static_assert(kNoiseGmmDim == kVoiceGmmDim,
26 "noise and voice gmm dimension not equal"); 26 "noise and voice gmm dimension not equal");
27 27
28 // These values should match MATLAB counterparts for unit-tests to pass. 28 // These values should match MATLAB counterparts for unit-tests to pass.
29 static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. 29 static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
30 static const double kInitialPriorProbability = 0.3; 30 static const double kInitialPriorProbability = 0.3;
31 static const int kTransientWidthThreshold = 7; 31 static const int kTransientWidthThreshold = 7;
32 static const double kLowProbabilityThreshold = 0.2; 32 static const double kLowProbabilityThreshold = 0.2;
33 33
34 static double LimitProbability(double p) { 34 static double LimitProbability(double p) {
35 const double kLimHigh = 0.99; 35 const double kLimHigh = 0.99;
36 const double kLimLow = 0.01; 36 const double kLimLow = 0.01;
37 37
38 if (p > kLimHigh) 38 if (p > kLimHigh)
39 p = kLimHigh; 39 p = kLimHigh;
40 else if (p < kLimLow) 40 else if (p < kLimLow)
41 p = kLimLow; 41 p = kLimLow;
42 return p; 42 return p;
43 } 43 }
44 44
45 PitchBasedVad::PitchBasedVad() 45 PitchBasedVad::PitchBasedVad()
46 : p_prior_(kInitialPriorProbability), 46 : p_prior_(kInitialPriorProbability),
47 circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { 47 circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
48 // Setup noise GMM. 48 // Setup noise GMM.
49 noise_gmm_.dimension = kNoiseGmmDim; 49 noise_gmm_.dimension = kNoiseGmmDim;
50 noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; 50 noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
51 noise_gmm_.weight = kNoiseGmmWeights; 51 noise_gmm_.weight = kNoiseGmmWeights;
52 noise_gmm_.mean = &kNoiseGmmMean[0][0]; 52 noise_gmm_.mean = &kNoiseGmmMean[0][0];
53 noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; 53 noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
54 54
55 // Setup voice GMM. 55 // Setup voice GMM.
56 voice_gmm_.dimension = kVoiceGmmDim; 56 voice_gmm_.dimension = kVoiceGmmDim;
57 voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; 57 voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
58 voice_gmm_.weight = kVoiceGmmWeights; 58 voice_gmm_.weight = kVoiceGmmWeights;
59 voice_gmm_.mean = &kVoiceGmmMean[0][0]; 59 voice_gmm_.mean = &kVoiceGmmMean[0][0];
60 voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; 60 voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
61 } 61 }
62 62
63 PitchBasedVad::~PitchBasedVad() { 63 PitchBasedVad::~PitchBasedVad() {}
64 }
65 64
66 int PitchBasedVad::VoicingProbability(const AudioFeatures& features, 65 int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
67 double* p_combined) { 66 double* p_combined) {
68 double p; 67 double p;
69 double gmm_features[3]; 68 double gmm_features[3];
70 double pdf_features_given_voice; 69 double pdf_features_given_voice;
71 double pdf_features_given_noise; 70 double pdf_features_given_noise;
72 // These limits are the same in matlab implementation 'VoicingProbGMM().' 71 // These limits are the same in matlab implementation 'VoicingProbGMM().'
73 const double kLimLowLogPitchGain = -2.0; 72 const double kLimLowLogPitchGain = -2.0;
74 const double kLimHighLogPitchGain = -0.9; 73 const double kLimHighLogPitchGain = -0.9;
75 const double kLimLowSpectralPeak = 200; 74 const double kLimLowSpectralPeak = 200;
76 const double kLimHighSpectralPeak = 2000; 75 const double kLimHighSpectralPeak = 2000;
77 const double kEps = 1e-12; 76 const double kEps = 1e-12;
78 for (int n = 0; n < features.num_frames; n++) { 77 for (int n = 0; n < features.num_frames; n++) {
79 gmm_features[0] = features.log_pitch_gain[n]; 78 gmm_features[0] = features.log_pitch_gain[n];
80 gmm_features[1] = features.spectral_peak[n]; 79 gmm_features[1] = features.spectral_peak[n];
81 gmm_features[2] = features.pitch_lag_hz[n]; 80 gmm_features[2] = features.pitch_lag_hz[n];
82 81
83 pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); 82 pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
84 pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); 83 pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
85 84
86 if (features.spectral_peak[n] < kLimLowSpectralPeak || 85 if (features.spectral_peak[n] < kLimLowSpectralPeak ||
87 features.spectral_peak[n] > kLimHighSpectralPeak || 86 features.spectral_peak[n] > kLimHighSpectralPeak ||
88 features.log_pitch_gain[n] < kLimLowLogPitchGain) { 87 features.log_pitch_gain[n] < kLimLowLogPitchGain) {
89 pdf_features_given_voice = kEps * pdf_features_given_noise; 88 pdf_features_given_voice = kEps * pdf_features_given_noise;
90 } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { 89 } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
91 pdf_features_given_noise = kEps * pdf_features_given_voice; 90 pdf_features_given_noise = kEps * pdf_features_given_voice;
92 } 91 }
93 92
94 p = p_prior_ * pdf_features_given_voice / 93 p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
95 (pdf_features_given_voice * p_prior_ + 94 p_prior_ + pdf_features_given_noise * (1 - p_prior_));
96 pdf_features_given_noise * (1 - p_prior_));
97 95
98 p = LimitProbability(p); 96 p = LimitProbability(p);
99 97
100 // Combine pitch-based probability with standalone probability, before 98 // Combine pitch-based probability with standalone probability, before
101 // updating prior probabilities. 99 // updating prior probabilities.
102 double prod_active = p * p_combined[n]; 100 double prod_active = p * p_combined[n];
103 double prod_inactive = (1 - p) * (1 - p_combined[n]); 101 double prod_inactive = (1 - p) * (1 - p_combined[n]);
104 p_combined[n] = prod_active / (prod_active + prod_inactive); 102 p_combined[n] = prod_active / (prod_active + prod_inactive);
105 103
106 if (UpdatePrior(p_combined[n]) < 0) 104 if (UpdatePrior(p_combined[n]) < 0)
107 return -1; 105 return -1;
108 // Limit prior probability. With a zero prior probability the posterior 106 // Limit prior probability. With a zero prior probability the posterior
109 // probability is always zero. 107 // probability is always zero.
110 p_prior_ = LimitProbability(p_prior_); 108 p_prior_ = LimitProbability(p_prior_);
111 } 109 }
112 return 0; 110 return 0;
113 } 111 }
114 112
115 int PitchBasedVad::UpdatePrior(double p) { 113 int PitchBasedVad::UpdatePrior(double p) {
116 circular_buffer_->Insert(p); 114 circular_buffer_->Insert(p);
117 if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, 115 if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
118 kLowProbabilityThreshold) < 0) 116 kLowProbabilityThreshold) < 0)
119 return -1; 117 return -1;
120 p_prior_ = circular_buffer_->Mean(); 118 p_prior_ = circular_buffer_->Mean();
121 return 0; 119 return 0;
122 } 120 }
123 121
124 } // namespace webrtc 122 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/agc/pitch_based_vad.h ('k') | webrtc/modules/audio_processing/agc/pitch_based_vad_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698