OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 12 matching lines...) Expand all Loading... | |
23 class VadAudioProc { | 23 class VadAudioProc { |
24 public: | 24 public: |
25 // Forward declare iSAC structs. | 25 // Forward declare iSAC structs. |
26 struct PitchAnalysisStruct; | 26 struct PitchAnalysisStruct; |
27 struct PreFiltBankstr; | 27 struct PreFiltBankstr; |
28 | 28 |
29 VadAudioProc(); | 29 VadAudioProc(); |
30 ~VadAudioProc(); | 30 ~VadAudioProc(); |
31 | 31 |
32 int ExtractFeatures(const int16_t* audio_frame, | 32 int ExtractFeatures(const int16_t* audio_frame, |
33 int length, | 33 size_t length, |
34 AudioFeatures* audio_features); | 34 AudioFeatures* audio_features); |
35 | 35 |
36 static const int kDftSize = 512; | 36 static const size_t kDftSize = 512; |
37 | 37 |
38 private: | 38 private: |
39 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, int length); | 39 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); |
40 void SubframeCorrelation(double* corr, int length_corr, int subframe_index); | 40 void SubframeCorrelation(double* corr, |
41 void GetLpcPolynomials(double* lpc, int length_lpc); | 41 size_t length_corr, |
42 void FindFirstSpectralPeaks(double* f_peak, int length_f_peak); | 42 size_t subframe_index); |
43 void Rms(double* rms, int length_rms); | 43 void GetLpcPolynomials(double* lpc, size_t length_lpc); |
44 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); | |
45 void Rms(double* rms, size_t length_rms); | |
44 void ResetBuffer(); | 46 void ResetBuffer(); |
45 | 47 |
46 // To compute spectral peak we perform LPC analysis to get spectral envelope. | 48 // To compute spectral peak we perform LPC analysis to get spectral envelope. |
47 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. | 49 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. |
48 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame | 50 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame |
49 // we need 5 ms of past signal to create the input of LPC analysis. | 51 // we need 5 ms of past signal to create the input of LPC analysis. |
50 static const int kNumPastSignalSamples = kSampleRateHz / 200; | 52 static const size_t kNumPastSignalSamples = |
53 static_cast<size_t>(kSampleRateHz / 200); | |
51 | 54 |
52 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that | 55 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that |
53 // all the code recognize it as "no-error." | 56 // all the code recognize it as "no-error." |
54 static const int kNoError = 0; | 57 static const int kNoError = 0; |
55 | 58 |
56 static const int kNum10msSubframes = 3; | 59 static const size_t kNum10msSubframes = 3; |
57 static const int kNumSubframeSamples = kSampleRateHz / 100; | 60 static const size_t kNumSubframeSamples = |
58 static const int kNumSamplesToProcess = | 61 static_cast<size_t>(kSampleRateHz / 100); |
62 static const size_t kNumSamplesToProcess = | |
59 kNum10msSubframes * | 63 kNum10msSubframes * |
60 kNumSubframeSamples; // Samples in 30 ms @ given sampling rate. | 64 kNumSubframeSamples; // Samples in 30 ms @ given sampling rate. |
61 static const int kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess; | 65 static const size_t kBufferLength = |
62 static const int kIpLength = kDftSize >> 1; | 66 kNumPastSignalSamples + kNumSamplesToProcess; |
63 static const int kWLength = kDftSize >> 1; | 67 static const size_t kIpLength = kDftSize >> 1; |
68 static const size_t kWLength = kDftSize >> 1; | |
64 | 69 |
65 static const int kLpcOrder = 16; | 70 static const size_t kLpcOrder = 16; |
66 | 71 |
67 int ip_[kIpLength]; | 72 size_t ip_[kIpLength]; |
aluebs-webrtc
2015/07/17 01:04:42
This is a helper array for the fft4g, which is act
Peter Kasting
2015/07/17 18:57:40
This is changing in sync with that -- see https://
aluebs-webrtc
2015/07/17 23:16:58
Oh, I see. I was not aware of that. Then it looks
| |
68 float w_fft_[kWLength]; | 73 float w_fft_[kWLength]; |
69 | 74 |
70 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). | 75 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). |
71 float audio_buffer_[kBufferLength]; | 76 float audio_buffer_[kBufferLength]; |
72 int num_buffer_samples_; | 77 size_t num_buffer_samples_; |
73 | 78 |
74 double log_old_gain_; | 79 double log_old_gain_; |
75 double old_lag_; | 80 double old_lag_; |
76 | 81 |
77 rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_; | 82 rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_; |
78 rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_; | 83 rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_; |
79 rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_; | 84 rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_; |
80 }; | 85 }; |
81 | 86 |
82 } // namespace webrtc | 87 } // namespace webrtc |
83 | 88 |
84 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ | 89 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ |
OLD | NEW |