OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 | 11 |
12 /* | 12 /* |
13 * This header file includes the descriptions of the core VAD calls. | 13 * This header file includes the descriptions of the core VAD calls. |
14 */ | 14 */ |
15 | 15 |
16 #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ | 16 #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ |
17 #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ | 17 #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ |
18 | 18 |
19 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 19 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
20 #include "webrtc/typedefs.h" | 20 #include "webrtc/typedefs.h" |
21 | 21 |
22 enum { kNumChannels = 6 }; // Number of frequency bands (named channels). | 22 enum { kNumChannels = 6 }; // Number of frequency bands (named channels). |
23 enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. | 23 enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. |
24 enum { kTableSize = kNumChannels * kNumGaussians }; | 24 enum { kTableSize = kNumChannels * kNumGaussians }; |
25 enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. | 25 enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. |
26 | 26 |
27 typedef struct VadInstT_ { | 27 typedef struct VadInstT_ |
| 28 { |
| 29 |
28 int vad; | 30 int vad; |
29 int32_t downsampling_filter_states[4]; | 31 int32_t downsampling_filter_states[4]; |
30 WebRtcSpl_State48khzTo8khz state_48_to_8; | 32 WebRtcSpl_State48khzTo8khz state_48_to_8; |
31 int16_t noise_means[kTableSize]; | 33 int16_t noise_means[kTableSize]; |
32 int16_t speech_means[kTableSize]; | 34 int16_t speech_means[kTableSize]; |
33 int16_t noise_stds[kTableSize]; | 35 int16_t noise_stds[kTableSize]; |
34 int16_t speech_stds[kTableSize]; | 36 int16_t speech_stds[kTableSize]; |
35 // TODO(bjornv): Change to |frame_count|. | 37 // TODO(bjornv): Change to |frame_count|. |
36 int32_t frame_counter; | 38 int32_t frame_counter; |
37 int16_t over_hang; // Over Hang | 39 int16_t over_hang; // Over Hang |
38 int16_t num_of_speech; | 40 int16_t num_of_speech; |
39 // TODO(bjornv): Change to |age_vector|. | 41 // TODO(bjornv): Change to |age_vector|. |
40 int16_t index_vector[16 * kNumChannels]; | 42 int16_t index_vector[16 * kNumChannels]; |
41 int16_t low_value_vector[16 * kNumChannels]; | 43 int16_t low_value_vector[16 * kNumChannels]; |
42 // TODO(bjornv): Change to |median|. | 44 // TODO(bjornv): Change to |median|. |
43 int16_t mean_value[kNumChannels]; | 45 int16_t mean_value[kNumChannels]; |
44 int16_t upper_state[5]; | 46 int16_t upper_state[5]; |
45 int16_t lower_state[5]; | 47 int16_t lower_state[5]; |
46 int16_t hp_filter_state[4]; | 48 int16_t hp_filter_state[4]; |
47 int16_t over_hang_max_1[3]; | 49 int16_t over_hang_max_1[3]; |
48 int16_t over_hang_max_2[3]; | 50 int16_t over_hang_max_2[3]; |
49 int16_t individual[3]; | 51 int16_t individual[3]; |
50 int16_t total[3]; | 52 int16_t total[3]; |
51 | 53 |
52 int init_flag; | 54 int init_flag; |
| 55 |
53 } VadInstT; | 56 } VadInstT; |
54 | 57 |
55 // Initializes the core VAD component. The default aggressiveness mode is | 58 // Initializes the core VAD component. The default aggressiveness mode is |
56 // controlled by |kDefaultMode| in vad_core.c. | 59 // controlled by |kDefaultMode| in vad_core.c. |
57 // | 60 // |
58 // - self [i/o] : Instance that should be initialized | 61 // - self [i/o] : Instance that should be initialized |
59 // | 62 // |
60 // returns : 0 (OK), -1 (null pointer in or if the default mode can't be | 63 // returns : 0 (OK), -1 (null pointer in or if the default mode can't be |
61 // set) | 64 // set) |
62 int WebRtcVad_InitCore(VadInstT* self); | 65 int WebRtcVad_InitCore(VadInstT* self); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
103 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, | 106 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, |
104 size_t frame_length); | 107 size_t frame_length); |
105 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, | 108 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, |
106 size_t frame_length); | 109 size_t frame_length); |
107 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, | 110 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, |
108 size_t frame_length); | 111 size_t frame_length); |
109 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, | 112 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, |
110 size_t frame_length); | 113 size_t frame_length); |
111 | 114 |
112 #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ | 115 #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ |
OLD | NEW |