OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 20 matching lines...) Expand all Loading... |
31 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; | 31 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; |
32 | 32 |
33 // High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is | 33 // High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is |
34 // sampled at 500 Hz. | 34 // sampled at 500 Hz. |
35 // | 35 // |
36 // - data_in [i] : Input audio data sampled at 500 Hz. | 36 // - data_in [i] : Input audio data sampled at 500 Hz. |
37 // - data_length [i] : Length of input and output data. | 37 // - data_length [i] : Length of input and output data. |
38 // - filter_state [i/o] : State of the filter. | 38 // - filter_state [i/o] : State of the filter. |
39 // - data_out [o] : Output audio data in the frequency interval | 39 // - data_out [o] : Output audio data in the frequency interval |
40 // 80 - 250 Hz. | 40 // 80 - 250 Hz. |
41 static void HighPassFilter(const int16_t* data_in, int data_length, | 41 static void HighPassFilter(const int16_t* data_in, size_t data_length, |
42 int16_t* filter_state, int16_t* data_out) { | 42 int16_t* filter_state, int16_t* data_out) { |
43 int i; | 43 size_t i; |
44 const int16_t* in_ptr = data_in; | 44 const int16_t* in_ptr = data_in; |
45 int16_t* out_ptr = data_out; | 45 int16_t* out_ptr = data_out; |
46 int32_t tmp32 = 0; | 46 int32_t tmp32 = 0; |
47 | 47 |
48 | 48 |
49 // The sum of the absolute values of the impulse response: | 49 // The sum of the absolute values of the impulse response: |
50 // The zero/pole-filter has a max amplification of a single sample of: 1.4546 | 50 // The zero/pole-filter has a max amplification of a single sample of: 1.4546 |
51 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 | 51 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 |
52 // The all-zero section has a max amplification of a single sample of: 1.6189 | 52 // The all-zero section has a max amplification of a single sample of: 1.6189 |
53 // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 | 53 // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 |
(...skipping 19 matching lines...) Expand all Loading... |
73 | 73 |
74 // All pass filtering of |data_in|, used before splitting the signal into two | 74 // All pass filtering of |data_in|, used before splitting the signal into two |
75 // frequency bands (low pass vs high pass). | 75 // frequency bands (low pass vs high pass). |
76 // Note that |data_in| and |data_out| can NOT correspond to the same address. | 76 // Note that |data_in| and |data_out| can NOT correspond to the same address. |
77 // | 77 // |
78 // - data_in [i] : Input audio signal given in Q0. | 78 // - data_in [i] : Input audio signal given in Q0. |
79 // - data_length [i] : Length of input and output data. | 79 // - data_length [i] : Length of input and output data. |
80 // - filter_coefficient [i] : Given in Q15. | 80 // - filter_coefficient [i] : Given in Q15. |
81 // - filter_state [i/o] : State of the filter given in Q(-1). | 81 // - filter_state [i/o] : State of the filter given in Q(-1). |
82 // - data_out [o] : Output audio signal given in Q(-1). | 82 // - data_out [o] : Output audio signal given in Q(-1). |
83 static void AllPassFilter(const int16_t* data_in, int data_length, | 83 static void AllPassFilter(const int16_t* data_in, size_t data_length, |
84 int16_t filter_coefficient, int16_t* filter_state, | 84 int16_t filter_coefficient, int16_t* filter_state, |
85 int16_t* data_out) { | 85 int16_t* data_out) { |
86 // The filter can only cause overflow (in the w16 output variable) | 86 // The filter can only cause overflow (in the w16 output variable) |
87 // if more than 4 consecutive input numbers are of maximum value and | 87 // if more than 4 consecutive input numbers are of maximum value and |
88 // has the the same sign as the impulse responses first taps. | 88 // has the the same sign as the impulse responses first taps. |
89 // First 6 taps of the impulse response: | 89 // First 6 taps of the impulse response: |
90 // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 | 90 // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 |
91 | 91 |
92 int i; | 92 size_t i; |
93 int16_t tmp16 = 0; | 93 int16_t tmp16 = 0; |
94 int32_t tmp32 = 0; | 94 int32_t tmp32 = 0; |
95 int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15 | 95 int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15 |
96 | 96 |
97 for (i = 0; i < data_length; i++) { | 97 for (i = 0; i < data_length; i++) { |
98 tmp32 = state32 + filter_coefficient * *data_in; | 98 tmp32 = state32 + filter_coefficient * *data_in; |
99 tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) | 99 tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) |
100 *data_out++ = tmp16; | 100 *data_out++ = tmp16; |
101 state32 = (*data_in << 14) - filter_coefficient * tmp16; // Q14 | 101 state32 = (*data_in << 14) - filter_coefficient * tmp16; // Q14 |
102 state32 <<= 1; // Q15. | 102 state32 <<= 1; // Q15. |
103 data_in += 2; | 103 data_in += 2; |
104 } | 104 } |
105 | 105 |
106 *filter_state = (int16_t) (state32 >> 16); // Q(-1) | 106 *filter_state = (int16_t) (state32 >> 16); // Q(-1) |
107 } | 107 } |
108 | 108 |
109 // Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to | 109 // Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to |
110 // an upper (high pass) part and a lower (low pass) part respectively. | 110 // an upper (high pass) part and a lower (low pass) part respectively. |
111 // | 111 // |
112 // - data_in [i] : Input audio data to be split into two frequency bands. | 112 // - data_in [i] : Input audio data to be split into two frequency bands. |
113 // - data_length [i] : Length of |data_in|. | 113 // - data_length [i] : Length of |data_in|. |
114 // - upper_state [i/o] : State of the upper filter, given in Q(-1). | 114 // - upper_state [i/o] : State of the upper filter, given in Q(-1). |
115 // - lower_state [i/o] : State of the lower filter, given in Q(-1). | 115 // - lower_state [i/o] : State of the lower filter, given in Q(-1). |
116 // - hp_data_out [o] : Output audio data of the upper half of the spectrum. | 116 // - hp_data_out [o] : Output audio data of the upper half of the spectrum. |
117 // The length is |data_length| / 2. | 117 // The length is |data_length| / 2. |
118 // - lp_data_out [o] : Output audio data of the lower half of the spectrum. | 118 // - lp_data_out [o] : Output audio data of the lower half of the spectrum. |
119 // The length is |data_length| / 2. | 119 // The length is |data_length| / 2. |
120 static void SplitFilter(const int16_t* data_in, int data_length, | 120 static void SplitFilter(const int16_t* data_in, size_t data_length, |
121 int16_t* upper_state, int16_t* lower_state, | 121 int16_t* upper_state, int16_t* lower_state, |
122 int16_t* hp_data_out, int16_t* lp_data_out) { | 122 int16_t* hp_data_out, int16_t* lp_data_out) { |
123 int i; | 123 size_t i; |
124 int half_length = data_length >> 1; // Downsampling by 2. | 124 size_t half_length = data_length >> 1; // Downsampling by 2. |
125 int16_t tmp_out; | 125 int16_t tmp_out; |
126 | 126 |
127 // All-pass filtering upper branch. | 127 // All-pass filtering upper branch. |
128 AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, | 128 AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, |
129 hp_data_out); | 129 hp_data_out); |
130 | 130 |
131 // All-pass filtering lower branch. | 131 // All-pass filtering lower branch. |
132 AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, | 132 AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, |
133 lp_data_out); | 133 lp_data_out); |
134 | 134 |
135 // Make LP and HP signals. | 135 // Make LP and HP signals. |
136 for (i = 0; i < half_length; i++) { | 136 for (i = 0; i < half_length; i++) { |
137 tmp_out = *hp_data_out; | 137 tmp_out = *hp_data_out; |
138 *hp_data_out++ -= *lp_data_out; | 138 *hp_data_out++ -= *lp_data_out; |
139 *lp_data_out++ += tmp_out; | 139 *lp_data_out++ += tmp_out; |
140 } | 140 } |
141 } | 141 } |
142 | 142 |
143 // Calculates the energy of |data_in| in dB, and also updates an overall | 143 // Calculates the energy of |data_in| in dB, and also updates an overall |
144 // |total_energy| if necessary. | 144 // |total_energy| if necessary. |
145 // | 145 // |
146 // - data_in [i] : Input audio data for energy calculation. | 146 // - data_in [i] : Input audio data for energy calculation. |
147 // - data_length [i] : Length of input data. | 147 // - data_length [i] : Length of input data. |
148 // - offset [i] : Offset value added to |log_energy|. | 148 // - offset [i] : Offset value added to |log_energy|. |
149 // - total_energy [i/o] : An external energy updated with the energy of | 149 // - total_energy [i/o] : An external energy updated with the energy of |
150 // |data_in|. | 150 // |data_in|. |
151 // NOTE: |total_energy| is only updated if | 151 // NOTE: |total_energy| is only updated if |
152 // |total_energy| <= |kMinEnergy|. | 152 // |total_energy| <= |kMinEnergy|. |
153 // - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4. | 153 // - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4. |
154 static void LogOfEnergy(const int16_t* data_in, int data_length, | 154 static void LogOfEnergy(const int16_t* data_in, size_t data_length, |
155 int16_t offset, int16_t* total_energy, | 155 int16_t offset, int16_t* total_energy, |
156 int16_t* log_energy) { | 156 int16_t* log_energy) { |
157 // |tot_rshifts| accumulates the number of right shifts performed on |energy|. | 157 // |tot_rshifts| accumulates the number of right shifts performed on |energy|. |
158 int tot_rshifts = 0; | 158 int tot_rshifts = 0; |
159 // The |energy| will be normalized to 15 bits. We use unsigned integer because | 159 // The |energy| will be normalized to 15 bits. We use unsigned integer because |
160 // we eventually will mask out the fractional part. | 160 // we eventually will mask out the fractional part. |
161 uint32_t energy = 0; | 161 uint32_t energy = 0; |
162 | 162 |
163 assert(data_in != NULL); | 163 assert(data_in != NULL); |
164 assert(data_length > 0); | 164 assert(data_length > 0); |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
236 // By construction |energy| is represented by 15 bits, hence any number of | 236 // By construction |energy| is represented by 15 bits, hence any number of |
237 // right shifted |energy| will fit in an int16_t. In addition, adding the | 237 // right shifted |energy| will fit in an int16_t. In addition, adding the |
238 // value to |total_energy| is wrap around safe as long as | 238 // value to |total_energy| is wrap around safe as long as |
239 // |kMinEnergy| < 8192. | 239 // |kMinEnergy| < 8192. |
240 *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. | 240 *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. |
241 } | 241 } |
242 } | 242 } |
243 } | 243 } |
244 | 244 |
245 int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, | 245 int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, |
246 int data_length, int16_t* features) { | 246 size_t data_length, int16_t* features) { |
247 int16_t total_energy = 0; | 247 int16_t total_energy = 0; |
248 // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to | 248 // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to |
249 // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will | 249 // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will |
250 // have at most 120 samples after the first split and at most 60 samples after | 250 // have at most 120 samples after the first split and at most 60 samples after |
251 // the second split. | 251 // the second split. |
252 int16_t hp_120[120], lp_120[120]; | 252 int16_t hp_120[120], lp_120[120]; |
253 int16_t hp_60[60], lp_60[60]; | 253 int16_t hp_60[60], lp_60[60]; |
254 const int half_data_length = data_length >> 1; | 254 const size_t half_data_length = data_length >> 1; |
255 int length = half_data_length; // |data_length| / 2, corresponds to | 255 size_t length = half_data_length; // |data_length| / 2, corresponds to |
256 // bandwidth = 2000 Hz after downsampling. | 256 // bandwidth = 2000 Hz after downsampling. |
257 | 257 |
258 // Initialize variables for the first SplitFilter(). | 258 // Initialize variables for the first SplitFilter(). |
259 int frequency_band = 0; | 259 int frequency_band = 0; |
260 const int16_t* in_ptr = data_in; // [0 - 4000] Hz. | 260 const int16_t* in_ptr = data_in; // [0 - 4000] Hz. |
261 int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. | 261 int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. |
262 int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. | 262 int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. |
263 | 263 |
264 assert(data_length >= 0); | |
265 assert(data_length <= 240); | 264 assert(data_length <= 240); |
266 assert(4 < kNumChannels - 1); // Checking maximum |frequency_band|. | 265 assert(4 < kNumChannels - 1); // Checking maximum |frequency_band|. |
267 | 266 |
268 // Split at 2000 Hz and downsample. | 267 // Split at 2000 Hz and downsample. |
269 SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], | 268 SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], |
270 &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); | 269 &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); |
271 | 270 |
272 // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. | 271 // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. |
273 frequency_band = 1; | 272 frequency_band = 1; |
274 in_ptr = hp_120; // [2000 - 4000] Hz. | 273 in_ptr = hp_120; // [2000 - 4000] Hz. |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
323 LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); | 322 LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); |
324 | 323 |
325 // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. | 324 // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. |
326 HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); | 325 HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); |
327 | 326 |
328 // Energy in 80 Hz - 250 Hz. | 327 // Energy in 80 Hz - 250 Hz. |
329 LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); | 328 LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); |
330 | 329 |
331 return total_energy; | 330 return total_energy; |
332 } | 331 } |
OLD | NEW |