webrtc/common_audio/vad/vad_filterbank.c - Issue 1227203003: Update audio code to use size_t more correctly, webrtc/common_audio/ portion.

Side by Side Diff: webrtc/common_audio/vad/vad_filterbank.c

Issue 1227203003: Update audio code to use size_t more correctly, webrtc/common_audio/ portion. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Resync Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/common_audio/vad/vad_filterbank.h ('k') | webrtc/common_audio/vad/vad_filterbank_unittest.cc » ('j') | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 20 matching lines...) Expand all Loading...
31 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };	31 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };

32	32

33 // High pass filtering, with a cut-off frequency at 80 Hz, if the \|data_in\| is	33 // High pass filtering, with a cut-off frequency at 80 Hz, if the \|data_in\| is

34 // sampled at 500 Hz.	34 // sampled at 500 Hz.

35 //	35 //

36 // - data_in [i] : Input audio data sampled at 500 Hz.	36 // - data_in [i] : Input audio data sampled at 500 Hz.

37 // - data_length [i] : Length of input and output data.	37 // - data_length [i] : Length of input and output data.

38 // - filter_state [i/o] : State of the filter.	38 // - filter_state [i/o] : State of the filter.

39 // - data_out [o] : Output audio data in the frequency interval	39 // - data_out [o] : Output audio data in the frequency interval

40 // 80 - 250 Hz.	40 // 80 - 250 Hz.

41 static void HighPassFilter(const int16_t* data_in, int data_length,	41 static void HighPassFilter(const int16_t* data_in, size_t data_length,

42 int16_t* filter_state, int16_t* data_out) {	42 int16_t* filter_state, int16_t* data_out) {

43 int i;	43 size_t i;

44 const int16_t* in_ptr = data_in;	44 const int16_t* in_ptr = data_in;

45 int16_t* out_ptr = data_out;	45 int16_t* out_ptr = data_out;

46 int32_t tmp32 = 0;	46 int32_t tmp32 = 0;

47	47

48	48

49 // The sum of the absolute values of the impulse response:	49 // The sum of the absolute values of the impulse response:

50 // The zero/pole-filter has a max amplification of a single sample of: 1.4546	50 // The zero/pole-filter has a max amplification of a single sample of: 1.4546

51 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194	51 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194

52 // The all-zero section has a max amplification of a single sample of: 1.6189	52 // The all-zero section has a max amplification of a single sample of: 1.6189

53 // Impulse response: 0.4047 -0.8094 0.4047 0 0 0	53 // Impulse response: 0.4047 -0.8094 0.4047 0 0 0

(...skipping 19 matching lines...) Expand all Loading...
73	73

74 // All pass filtering of \|data_in\|, used before splitting the signal into two	74 // All pass filtering of \|data_in\|, used before splitting the signal into two

75 // frequency bands (low pass vs high pass).	75 // frequency bands (low pass vs high pass).

76 // Note that \|data_in\| and \|data_out\| can NOT correspond to the same address.	76 // Note that \|data_in\| and \|data_out\| can NOT correspond to the same address.

77 //	77 //

78 // - data_in [i] : Input audio signal given in Q0.	78 // - data_in [i] : Input audio signal given in Q0.

79 // - data_length [i] : Length of input and output data.	79 // - data_length [i] : Length of input and output data.

80 // - filter_coefficient [i] : Given in Q15.	80 // - filter_coefficient [i] : Given in Q15.

81 // - filter_state [i/o] : State of the filter given in Q(-1).	81 // - filter_state [i/o] : State of the filter given in Q(-1).

82 // - data_out [o] : Output audio signal given in Q(-1).	82 // - data_out [o] : Output audio signal given in Q(-1).

83 static void AllPassFilter(const int16_t* data_in, int data_length,	83 static void AllPassFilter(const int16_t* data_in, size_t data_length,

84 int16_t filter_coefficient, int16_t* filter_state,	84 int16_t filter_coefficient, int16_t* filter_state,

85 int16_t* data_out) {	85 int16_t* data_out) {

86 // The filter can only cause overflow (in the w16 output variable)	86 // The filter can only cause overflow (in the w16 output variable)

87 // if more than 4 consecutive input numbers are of maximum value and	87 // if more than 4 consecutive input numbers are of maximum value and

88 // has the the same sign as the impulse responses first taps.	88 // has the the same sign as the impulse responses first taps.

89 // First 6 taps of the impulse response:	89 // First 6 taps of the impulse response:

90 // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990	90 // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990

91	91

92 int i;	92 size_t i;

93 int16_t tmp16 = 0;	93 int16_t tmp16 = 0;

94 int32_t tmp32 = 0;	94 int32_t tmp32 = 0;

95 int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15	95 int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15

96	96

97 for (i = 0; i < data_length; i++) {	97 for (i = 0; i < data_length; i++) {

98 tmp32 = state32 + filter_coefficient * *data_in;	98 tmp32 = state32 + filter_coefficient * *data_in;

99 tmp16 = (int16_t) (tmp32 >> 16); // Q(-1)	99 tmp16 = (int16_t) (tmp32 >> 16); // Q(-1)

100 *data_out++ = tmp16;	100 *data_out++ = tmp16;

101 state32 = (data_in << 14) - filter_coefficient tmp16; // Q14	101 state32 = (data_in << 14) - filter_coefficient tmp16; // Q14

102 state32 <<= 1; // Q15.	102 state32 <<= 1; // Q15.

103 data_in += 2;	103 data_in += 2;

104 }	104 }

105	105

106 *filter_state = (int16_t) (state32 >> 16); // Q(-1)	106 *filter_state = (int16_t) (state32 >> 16); // Q(-1)

107 }	107 }

108	108

109 // Splits \|data_in\| into \|hp_data_out\| and \|lp_data_out\| corresponding to	109 // Splits \|data_in\| into \|hp_data_out\| and \|lp_data_out\| corresponding to

110 // an upper (high pass) part and a lower (low pass) part respectively.	110 // an upper (high pass) part and a lower (low pass) part respectively.

111 //	111 //

112 // - data_in [i] : Input audio data to be split into two frequency bands.	112 // - data_in [i] : Input audio data to be split into two frequency bands.

113 // - data_length [i] : Length of \|data_in\|.	113 // - data_length [i] : Length of \|data_in\|.

114 // - upper_state [i/o] : State of the upper filter, given in Q(-1).	114 // - upper_state [i/o] : State of the upper filter, given in Q(-1).

115 // - lower_state [i/o] : State of the lower filter, given in Q(-1).	115 // - lower_state [i/o] : State of the lower filter, given in Q(-1).

116 // - hp_data_out [o] : Output audio data of the upper half of the spectrum.	116 // - hp_data_out [o] : Output audio data of the upper half of the spectrum.

117 // The length is \|data_length\| / 2.	117 // The length is \|data_length\| / 2.

118 // - lp_data_out [o] : Output audio data of the lower half of the spectrum.	118 // - lp_data_out [o] : Output audio data of the lower half of the spectrum.

119 // The length is \|data_length\| / 2.	119 // The length is \|data_length\| / 2.

120 static void SplitFilter(const int16_t* data_in, int data_length,	120 static void SplitFilter(const int16_t* data_in, size_t data_length,

121 int16_t* upper_state, int16_t* lower_state,	121 int16_t* upper_state, int16_t* lower_state,

122 int16_t* hp_data_out, int16_t* lp_data_out) {	122 int16_t* hp_data_out, int16_t* lp_data_out) {

123 int i;	123 size_t i;

124 int half_length = data_length >> 1; // Downsampling by 2.	124 size_t half_length = data_length >> 1; // Downsampling by 2.

125 int16_t tmp_out;	125 int16_t tmp_out;

126	126

127 // All-pass filtering upper branch.	127 // All-pass filtering upper branch.

128 AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state,	128 AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state,

129 hp_data_out);	129 hp_data_out);

130	130

131 // All-pass filtering lower branch.	131 // All-pass filtering lower branch.

132 AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state,	132 AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state,

133 lp_data_out);	133 lp_data_out);

134	134

135 // Make LP and HP signals.	135 // Make LP and HP signals.

136 for (i = 0; i < half_length; i++) {	136 for (i = 0; i < half_length; i++) {

137 tmp_out = *hp_data_out;	137 tmp_out = *hp_data_out;

138 hp_data_out++ -= lp_data_out;	138 hp_data_out++ -= lp_data_out;

139 *lp_data_out++ += tmp_out;	139 *lp_data_out++ += tmp_out;

140 }	140 }

141 }	141 }

142	142

143 // Calculates the energy of \|data_in\| in dB, and also updates an overall	143 // Calculates the energy of \|data_in\| in dB, and also updates an overall

144 // \|total_energy\| if necessary.	144 // \|total_energy\| if necessary.

145 //	145 //

146 // - data_in [i] : Input audio data for energy calculation.	146 // - data_in [i] : Input audio data for energy calculation.

147 // - data_length [i] : Length of input data.	147 // - data_length [i] : Length of input data.

148 // - offset [i] : Offset value added to \|log_energy\|.	148 // - offset [i] : Offset value added to \|log_energy\|.

149 // - total_energy [i/o] : An external energy updated with the energy of	149 // - total_energy [i/o] : An external energy updated with the energy of

150 // \|data_in\|.	150 // \|data_in\|.

151 // NOTE: \|total_energy\| is only updated if	151 // NOTE: \|total_energy\| is only updated if

152 // \|total_energy\| <= \|kMinEnergy\|.	152 // \|total_energy\| <= \|kMinEnergy\|.

153 // - log_energy [o] : 10 * log10("energy of \|data_in\|") given in Q4.	153 // - log_energy [o] : 10 * log10("energy of \|data_in\|") given in Q4.

154 static void LogOfEnergy(const int16_t* data_in, int data_length,	154 static void LogOfEnergy(const int16_t* data_in, size_t data_length,

155 int16_t offset, int16_t* total_energy,	155 int16_t offset, int16_t* total_energy,

156 int16_t* log_energy) {	156 int16_t* log_energy) {

157 // \|tot_rshifts\| accumulates the number of right shifts performed on \|energy\|.	157 // \|tot_rshifts\| accumulates the number of right shifts performed on \|energy\|.

158 int tot_rshifts = 0;	158 int tot_rshifts = 0;

159 // The \|energy\| will be normalized to 15 bits. We use unsigned integer because	159 // The \|energy\| will be normalized to 15 bits. We use unsigned integer because

160 // we eventually will mask out the fractional part.	160 // we eventually will mask out the fractional part.

161 uint32_t energy = 0;	161 uint32_t energy = 0;

162	162

163 assert(data_in != NULL);	163 assert(data_in != NULL);

164 assert(data_length > 0);	164 assert(data_length > 0);

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
236 // By construction \|energy\| is represented by 15 bits, hence any number of	236 // By construction \|energy\| is represented by 15 bits, hence any number of

237 // right shifted \|energy\| will fit in an int16_t. In addition, adding the	237 // right shifted \|energy\| will fit in an int16_t. In addition, adding the

238 // value to \|total_energy\| is wrap around safe as long as	238 // value to \|total_energy\| is wrap around safe as long as

239 // \|kMinEnergy\| < 8192.	239 // \|kMinEnergy\| < 8192.

240 *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0.	240 *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0.

241 }	241 }

242 }	242 }

243 }	243 }

244	244

245 int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,	245 int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,

246 int data_length, int16_t* features) {	246 size_t data_length, int16_t* features) {

247 int16_t total_energy = 0;	247 int16_t total_energy = 0;

248 // We expect \|data_length\| to be 80, 160 or 240 samples, which corresponds to	248 // We expect \|data_length\| to be 80, 160 or 240 samples, which corresponds to

249 // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will	249 // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will

250 // have at most 120 samples after the first split and at most 60 samples after	250 // have at most 120 samples after the first split and at most 60 samples after

251 // the second split.	251 // the second split.

252 int16_t hp_120[120], lp_120[120];	252 int16_t hp_120[120], lp_120[120];

253 int16_t hp_60[60], lp_60[60];	253 int16_t hp_60[60], lp_60[60];

254 const int half_data_length = data_length >> 1;	254 const size_t half_data_length = data_length >> 1;

255 int length = half_data_length; // \|data_length\| / 2, corresponds to	255 size_t length = half_data_length; // \|data_length\| / 2, corresponds to

256 // bandwidth = 2000 Hz after downsampling.	256 // bandwidth = 2000 Hz after downsampling.

257	257

258 // Initialize variables for the first SplitFilter().	258 // Initialize variables for the first SplitFilter().

259 int frequency_band = 0;	259 int frequency_band = 0;

260 const int16_t* in_ptr = data_in; // [0 - 4000] Hz.	260 const int16_t* in_ptr = data_in; // [0 - 4000] Hz.

261 int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz.	261 int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz.

262 int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz.	262 int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz.

263	263

264 assert(data_length >= 0);

265 assert(data_length <= 240);	264 assert(data_length <= 240);

266 assert(4 < kNumChannels - 1); // Checking maximum \|frequency_band\|.	265 assert(4 < kNumChannels - 1); // Checking maximum \|frequency_band\|.

267	266

268 // Split at 2000 Hz and downsample.	267 // Split at 2000 Hz and downsample.

269 SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],	268 SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],

270 &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);	269 &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);

271	270

272 // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample.	271 // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample.

273 frequency_band = 1;	272 frequency_band = 1;

274 in_ptr = hp_120; // [2000 - 4000] Hz.	273 in_ptr = hp_120; // [2000 - 4000] Hz.

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
323 LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);	322 LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);

324	323

325 // Remove 0 Hz - 80 Hz, by high pass filtering the lower band.	324 // Remove 0 Hz - 80 Hz, by high pass filtering the lower band.

326 HighPassFilter(lp_60, length, self->hp_filter_state, hp_120);	325 HighPassFilter(lp_60, length, self->hp_filter_state, hp_120);

327	326

328 // Energy in 80 Hz - 250 Hz.	327 // Energy in 80 Hz - 250 Hz.

329 LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]);	328 LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]);

330	329

331 return total_energy;	330 return total_energy;

332 }	331 }

OLD	NEW