OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
12 #include <stdlib.h> | 12 #include <stdlib.h> |
13 | 13 |
14 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 14 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
15 | 15 |
16 // Maximum absolute value of word16 vector. C version for generic platforms. | 16 // Maximum absolute value of word16 vector. C version for generic platforms. |
17 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length) { | 17 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { |
18 int absolute = 0, maximum = 0; | 18 int absolute = 0, maximum = 0; |
19 | 19 |
20 if (vector == NULL || length <= 0) { | 20 if (vector == NULL || length == 0) { |
21 return -1; | 21 return -1; |
22 } | 22 } |
23 | 23 |
24 const int16_t* p_start = vector; | 24 const int16_t* p_start = vector; |
25 int rest = length & 7; | 25 size_t rest = length & 7; |
26 const int16_t* p_end = vector + length - rest; | 26 const int16_t* p_end = vector + length - rest; |
27 | 27 |
28 int16x8_t v; | 28 int16x8_t v; |
29 uint16x8_t max_qv; | 29 uint16x8_t max_qv; |
30 max_qv = vdupq_n_u16(0); | 30 max_qv = vdupq_n_u16(0); |
31 | 31 |
32 while (p_start < p_end) { | 32 while (p_start < p_end) { |
33 v = vld1q_s16(p_start); | 33 v = vld1q_s16(p_start); |
34 // Note vabs doesn't change the value of -32768. | 34 // Note vabs doesn't change the value of -32768. |
35 v = vabsq_s16(v); | 35 v = vabsq_s16(v); |
(...skipping 26 matching lines...) Expand all Loading... |
62 // Guard the case for abs(-32768). | 62 // Guard the case for abs(-32768). |
63 if (maximum > WEBRTC_SPL_WORD16_MAX) { | 63 if (maximum > WEBRTC_SPL_WORD16_MAX) { |
64 maximum = WEBRTC_SPL_WORD16_MAX; | 64 maximum = WEBRTC_SPL_WORD16_MAX; |
65 } | 65 } |
66 | 66 |
67 return (int16_t)maximum; | 67 return (int16_t)maximum; |
68 } | 68 } |
69 | 69 |
70 // Maximum absolute value of word32 vector. NEON intrinsics version for | 70 // Maximum absolute value of word32 vector. NEON intrinsics version for |
71 // ARM 32-bit/64-bit platforms. | 71 // ARM 32-bit/64-bit platforms. |
72 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length) { | 72 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { |
73 // Use uint32_t for the local variables, to accommodate the return value | 73 // Use uint32_t for the local variables, to accommodate the return value |
74 // of abs(0x80000000), which is 0x80000000. | 74 // of abs(0x80000000), which is 0x80000000. |
75 | 75 |
76 uint32_t absolute = 0, maximum = 0; | 76 uint32_t absolute = 0, maximum = 0; |
77 int i = 0; | 77 size_t i = 0; |
78 int residual = length & 0x7; | 78 size_t residual = length & 0x7; |
79 | 79 |
80 if (vector == NULL || length <= 0) { | 80 if (vector == NULL || length == 0) { |
81 return -1; | 81 return -1; |
82 } | 82 } |
83 | 83 |
84 const int32_t* p_start = vector; | 84 const int32_t* p_start = vector; |
85 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 85 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
86 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 86 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
87 | 87 |
88 // First part, unroll the loop 8 times. | 88 // First part, unroll the loop 8 times. |
89 for (i = 0; i < length - residual; i += 8) { | 89 for (i = 0; i < length - residual; i += 8) { |
90 int32x4_t in32x4_0 = vld1q_s32(p_start); | 90 int32x4_t in32x4_0 = vld1q_s32(p_start); |
(...skipping 28 matching lines...) Expand all Loading... |
119 } | 119 } |
120 | 120 |
121 // Guard against the case for 0x80000000. | 121 // Guard against the case for 0x80000000. |
122 maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); | 122 maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); |
123 | 123 |
124 return (int32_t)maximum; | 124 return (int32_t)maximum; |
125 } | 125 } |
126 | 126 |
127 // Maximum value of word16 vector. NEON intrinsics version for | 127 // Maximum value of word16 vector. NEON intrinsics version for |
128 // ARM 32-bit/64-bit platforms. | 128 // ARM 32-bit/64-bit platforms. |
129 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length) { | 129 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { |
130 int16_t maximum = WEBRTC_SPL_WORD16_MIN; | 130 int16_t maximum = WEBRTC_SPL_WORD16_MIN; |
131 int i = 0; | 131 size_t i = 0; |
132 int residual = length & 0x7; | 132 size_t residual = length & 0x7; |
133 | 133 |
134 if (vector == NULL || length <= 0) { | 134 if (vector == NULL || length == 0) { |
135 return maximum; | 135 return maximum; |
136 } | 136 } |
137 | 137 |
138 const int16_t* p_start = vector; | 138 const int16_t* p_start = vector; |
139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
140 | 140 |
141 // First part, unroll the loop 8 times. | 141 // First part, unroll the loop 8 times. |
142 for (i = 0; i < length - residual; i += 8) { | 142 for (i = 0; i < length - residual; i += 8) { |
143 int16x8_t in16x8 = vld1q_s16(p_start); | 143 int16x8_t in16x8 = vld1q_s16(p_start); |
144 max16x8 = vmaxq_s16(max16x8, in16x8); | 144 max16x8 = vmaxq_s16(max16x8, in16x8); |
(...skipping 14 matching lines...) Expand all Loading... |
159 for (i = residual; i > 0; i--) { | 159 for (i = residual; i > 0; i--) { |
160 if (*p_start > maximum) | 160 if (*p_start > maximum) |
161 maximum = *p_start; | 161 maximum = *p_start; |
162 p_start++; | 162 p_start++; |
163 } | 163 } |
164 return maximum; | 164 return maximum; |
165 } | 165 } |
166 | 166 |
167 // Maximum value of word32 vector. NEON intrinsics version for | 167 // Maximum value of word32 vector. NEON intrinsics version for |
168 // ARM 32-bit/64-bit platforms. | 168 // ARM 32-bit/64-bit platforms. |
169 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length) { | 169 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { |
170 int32_t maximum = WEBRTC_SPL_WORD32_MIN; | 170 int32_t maximum = WEBRTC_SPL_WORD32_MIN; |
171 int i = 0; | 171 size_t i = 0; |
172 int residual = length & 0x7; | 172 size_t residual = length & 0x7; |
173 | 173 |
174 if (vector == NULL || length <= 0) { | 174 if (vector == NULL || length == 0) { |
175 return maximum; | 175 return maximum; |
176 } | 176 } |
177 | 177 |
178 const int32_t* p_start = vector; | 178 const int32_t* p_start = vector; |
179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
181 | 181 |
182 // First part, unroll the loop 8 times. | 182 // First part, unroll the loop 8 times. |
183 for (i = 0; i < length - residual; i += 8) { | 183 for (i = 0; i < length - residual; i += 8) { |
184 int32x4_t in32x4_0 = vld1q_s32(p_start); | 184 int32x4_t in32x4_0 = vld1q_s32(p_start); |
(...skipping 18 matching lines...) Expand all Loading... |
203 for (i = residual; i > 0; i--) { | 203 for (i = residual; i > 0; i--) { |
204 if (*p_start > maximum) | 204 if (*p_start > maximum) |
205 maximum = *p_start; | 205 maximum = *p_start; |
206 p_start++; | 206 p_start++; |
207 } | 207 } |
208 return maximum; | 208 return maximum; |
209 } | 209 } |
210 | 210 |
211 // Minimum value of word16 vector. NEON intrinsics version for | 211 // Minimum value of word16 vector. NEON intrinsics version for |
212 // ARM 32-bit/64-bit platforms. | 212 // ARM 32-bit/64-bit platforms. |
213 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length) { | 213 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { |
214 int16_t minimum = WEBRTC_SPL_WORD16_MAX; | 214 int16_t minimum = WEBRTC_SPL_WORD16_MAX; |
215 int i = 0; | 215 size_t i = 0; |
216 int residual = length & 0x7; | 216 size_t residual = length & 0x7; |
217 | 217 |
218 if (vector == NULL || length <= 0) { | 218 if (vector == NULL || length == 0) { |
219 return minimum; | 219 return minimum; |
220 } | 220 } |
221 | 221 |
222 const int16_t* p_start = vector; | 222 const int16_t* p_start = vector; |
223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
224 | 224 |
225 // First part, unroll the loop 8 times. | 225 // First part, unroll the loop 8 times. |
226 for (i = 0; i < length - residual; i += 8) { | 226 for (i = 0; i < length - residual; i += 8) { |
227 int16x8_t in16x8 = vld1q_s16(p_start); | 227 int16x8_t in16x8 = vld1q_s16(p_start); |
228 min16x8 = vminq_s16(min16x8, in16x8); | 228 min16x8 = vminq_s16(min16x8, in16x8); |
(...skipping 14 matching lines...) Expand all Loading... |
243 for (i = residual; i > 0; i--) { | 243 for (i = residual; i > 0; i--) { |
244 if (*p_start < minimum) | 244 if (*p_start < minimum) |
245 minimum = *p_start; | 245 minimum = *p_start; |
246 p_start++; | 246 p_start++; |
247 } | 247 } |
248 return minimum; | 248 return minimum; |
249 } | 249 } |
250 | 250 |
251 // Minimum value of word32 vector. NEON intrinsics version for | 251 // Minimum value of word32 vector. NEON intrinsics version for |
252 // ARM 32-bit/64-bit platforms. | 252 // ARM 32-bit/64-bit platforms. |
253 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length) { | 253 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { |
254 int32_t minimum = WEBRTC_SPL_WORD32_MAX; | 254 int32_t minimum = WEBRTC_SPL_WORD32_MAX; |
255 int i = 0; | 255 size_t i = 0; |
256 int residual = length & 0x7; | 256 size_t residual = length & 0x7; |
257 | 257 |
258 if (vector == NULL || length <= 0) { | 258 if (vector == NULL || length == 0) { |
259 return minimum; | 259 return minimum; |
260 } | 260 } |
261 | 261 |
262 const int32_t* p_start = vector; | 262 const int32_t* p_start = vector; |
263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
265 | 265 |
266 // First part, unroll the loop 8 times. | 266 // First part, unroll the loop 8 times. |
267 for (i = 0; i < length - residual; i += 8) { | 267 for (i = 0; i < length - residual; i += 8) { |
268 int32x4_t in32x4_0 = vld1q_s32(p_start); | 268 int32x4_t in32x4_0 = vld1q_s32(p_start); |
(...skipping 16 matching lines...) Expand all Loading... |
285 | 285 |
286 // Second part, do the remaining iterations (if any). | 286 // Second part, do the remaining iterations (if any). |
287 for (i = residual; i > 0; i--) { | 287 for (i = residual; i > 0; i--) { |
288 if (*p_start < minimum) | 288 if (*p_start < minimum) |
289 minimum = *p_start; | 289 minimum = *p_start; |
290 p_start++; | 290 p_start++; |
291 } | 291 } |
292 return minimum; | 292 return minimum; |
293 } | 293 } |
294 | 294 |
OLD | NEW |