OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" | |
12 | |
13 #include <assert.h> | |
14 #include <math.h> | |
15 #include <string.h> | |
16 | |
17 #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" | |
18 #include "webrtc/modules/audio_processing/vad/common.h" | |
19 #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h" | |
20 #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h" | |
21 #include "webrtc/modules/interface/module_common_types.h" | |
22 | |
23 namespace webrtc { | |
24 | |
25 static_assert(kNoiseGmmDim == kVoiceGmmDim, | |
26 "noise and voice gmm dimension not equal"); | |
27 | |
28 // These values should match MATLAB counterparts for unit-tests to pass. | |
29 static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. | |
30 static const double kInitialPriorProbability = 0.3; | |
31 static const int kTransientWidthThreshold = 7; | |
32 static const double kLowProbabilityThreshold = 0.2; | |
33 | |
34 static double LimitProbability(double p) { | |
35 const double kLimHigh = 0.99; | |
36 const double kLimLow = 0.01; | |
37 | |
38 if (p > kLimHigh) | |
39 p = kLimHigh; | |
40 else if (p < kLimLow) | |
41 p = kLimLow; | |
42 return p; | |
43 } | |
44 | |
45 PitchBasedVad::PitchBasedVad() | |
46 : p_prior_(kInitialPriorProbability), | |
47 circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { | |
48 // Setup noise GMM. | |
49 noise_gmm_.dimension = kNoiseGmmDim; | |
50 noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; | |
51 noise_gmm_.weight = kNoiseGmmWeights; | |
52 noise_gmm_.mean = &kNoiseGmmMean[0][0]; | |
53 noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; | |
54 | |
55 // Setup voice GMM. | |
56 voice_gmm_.dimension = kVoiceGmmDim; | |
57 voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; | |
58 voice_gmm_.weight = kVoiceGmmWeights; | |
59 voice_gmm_.mean = &kVoiceGmmMean[0][0]; | |
60 voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; | |
61 } | |
62 | |
63 PitchBasedVad::~PitchBasedVad() { | |
64 } | |
65 | |
66 int PitchBasedVad::VoicingProbability(const AudioFeatures& features, | |
67 double* p_combined) { | |
68 double p; | |
69 double gmm_features[3]; | |
70 double pdf_features_given_voice; | |
71 double pdf_features_given_noise; | |
72 // These limits are the same in matlab implementation 'VoicingProbGMM().' | |
73 const double kLimLowLogPitchGain = -2.0; | |
74 const double kLimHighLogPitchGain = -0.9; | |
75 const double kLimLowSpectralPeak = 200; | |
76 const double kLimHighSpectralPeak = 2000; | |
77 const double kEps = 1e-12; | |
78 for (int n = 0; n < features.num_frames; n++) { | |
79 gmm_features[0] = features.log_pitch_gain[n]; | |
80 gmm_features[1] = features.spectral_peak[n]; | |
81 gmm_features[2] = features.pitch_lag_hz[n]; | |
82 | |
83 pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); | |
84 pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); | |
85 | |
86 if (features.spectral_peak[n] < kLimLowSpectralPeak || | |
87 features.spectral_peak[n] > kLimHighSpectralPeak || | |
88 features.log_pitch_gain[n] < kLimLowLogPitchGain) { | |
89 pdf_features_given_voice = kEps * pdf_features_given_noise; | |
90 } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { | |
91 pdf_features_given_noise = kEps * pdf_features_given_voice; | |
92 } | |
93 | |
94 p = p_prior_ * pdf_features_given_voice / | |
95 (pdf_features_given_voice * p_prior_ + | |
96 pdf_features_given_noise * (1 - p_prior_)); | |
97 | |
98 p = LimitProbability(p); | |
99 | |
100 // Combine pitch-based probability with standalone probability, before | |
101 // updating prior probabilities. | |
102 double prod_active = p * p_combined[n]; | |
103 double prod_inactive = (1 - p) * (1 - p_combined[n]); | |
104 p_combined[n] = prod_active / (prod_active + prod_inactive); | |
105 | |
106 if (UpdatePrior(p_combined[n]) < 0) | |
107 return -1; | |
108 // Limit prior probability. With a zero prior probability the posterior | |
109 // probability is always zero. | |
110 p_prior_ = LimitProbability(p_prior_); | |
111 } | |
112 return 0; | |
113 } | |
114 | |
115 int PitchBasedVad::UpdatePrior(double p) { | |
116 circular_buffer_->Insert(p); | |
117 if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, | |
118 kLowProbabilityThreshold) < 0) | |
119 return -1; | |
120 p_prior_ = circular_buffer_->Mean(); | |
121 return 0; | |
122 } | |
123 | |
124 } // namespace webrtc | |
OLD | NEW |