OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 #include <assert.h> |
12 #include <stdlib.h> | 13 #include <stdlib.h> |
13 | 14 |
14 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
15 | 16 |
16 // Maximum absolute value of word16 vector. C version for generic platforms. | 17 // Maximum absolute value of word16 vector. C version for generic platforms. |
17 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { | 18 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { |
18 int absolute = 0, maximum = 0; | 19 int absolute = 0, maximum = 0; |
19 | 20 |
20 if (vector == NULL || length == 0) { | 21 assert(length > 0); |
21 return -1; | |
22 } | |
23 | 22 |
24 const int16_t* p_start = vector; | 23 const int16_t* p_start = vector; |
25 size_t rest = length & 7; | 24 size_t rest = length & 7; |
26 const int16_t* p_end = vector + length - rest; | 25 const int16_t* p_end = vector + length - rest; |
27 | 26 |
28 int16x8_t v; | 27 int16x8_t v; |
29 uint16x8_t max_qv; | 28 uint16x8_t max_qv; |
30 max_qv = vdupq_n_u16(0); | 29 max_qv = vdupq_n_u16(0); |
31 | 30 |
32 while (p_start < p_end) { | 31 while (p_start < p_end) { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
70 // Maximum absolute value of word32 vector. NEON intrinsics version for | 69 // Maximum absolute value of word32 vector. NEON intrinsics version for |
71 // ARM 32-bit/64-bit platforms. | 70 // ARM 32-bit/64-bit platforms. |
72 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { | 71 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { |
73 // Use uint32_t for the local variables, to accommodate the return value | 72 // Use uint32_t for the local variables, to accommodate the return value |
74 // of abs(0x80000000), which is 0x80000000. | 73 // of abs(0x80000000), which is 0x80000000. |
75 | 74 |
76 uint32_t absolute = 0, maximum = 0; | 75 uint32_t absolute = 0, maximum = 0; |
77 size_t i = 0; | 76 size_t i = 0; |
78 size_t residual = length & 0x7; | 77 size_t residual = length & 0x7; |
79 | 78 |
80 if (vector == NULL || length == 0) { | 79 assert(length > 0); |
81 return -1; | |
82 } | |
83 | 80 |
84 const int32_t* p_start = vector; | 81 const int32_t* p_start = vector; |
85 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 82 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
86 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 83 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
87 | 84 |
88 // First part, unroll the loop 8 times. | 85 // First part, unroll the loop 8 times. |
89 for (i = 0; i < length - residual; i += 8) { | 86 for (i = 0; i < length - residual; i += 8) { |
90 int32x4_t in32x4_0 = vld1q_s32(p_start); | 87 int32x4_t in32x4_0 = vld1q_s32(p_start); |
91 p_start += 4; | 88 p_start += 4; |
92 int32x4_t in32x4_1 = vld1q_s32(p_start); | 89 int32x4_t in32x4_1 = vld1q_s32(p_start); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 return (int32_t)maximum; | 121 return (int32_t)maximum; |
125 } | 122 } |
126 | 123 |
127 // Maximum value of word16 vector. NEON intrinsics version for | 124 // Maximum value of word16 vector. NEON intrinsics version for |
128 // ARM 32-bit/64-bit platforms. | 125 // ARM 32-bit/64-bit platforms. |
129 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { | 126 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { |
130 int16_t maximum = WEBRTC_SPL_WORD16_MIN; | 127 int16_t maximum = WEBRTC_SPL_WORD16_MIN; |
131 size_t i = 0; | 128 size_t i = 0; |
132 size_t residual = length & 0x7; | 129 size_t residual = length & 0x7; |
133 | 130 |
134 if (vector == NULL || length == 0) { | 131 assert(length > 0); |
135 return maximum; | |
136 } | |
137 | 132 |
138 const int16_t* p_start = vector; | 133 const int16_t* p_start = vector; |
139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 134 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
140 | 135 |
141 // First part, unroll the loop 8 times. | 136 // First part, unroll the loop 8 times. |
142 for (i = 0; i < length - residual; i += 8) { | 137 for (i = 0; i < length - residual; i += 8) { |
143 int16x8_t in16x8 = vld1q_s16(p_start); | 138 int16x8_t in16x8 = vld1q_s16(p_start); |
144 max16x8 = vmaxq_s16(max16x8, in16x8); | 139 max16x8 = vmaxq_s16(max16x8, in16x8); |
145 p_start += 8; | 140 p_start += 8; |
146 } | 141 } |
(...skipping 17 matching lines...) Expand all Loading... |
164 return maximum; | 159 return maximum; |
165 } | 160 } |
166 | 161 |
167 // Maximum value of word32 vector. NEON intrinsics version for | 162 // Maximum value of word32 vector. NEON intrinsics version for |
168 // ARM 32-bit/64-bit platforms. | 163 // ARM 32-bit/64-bit platforms. |
169 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { | 164 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { |
170 int32_t maximum = WEBRTC_SPL_WORD32_MIN; | 165 int32_t maximum = WEBRTC_SPL_WORD32_MIN; |
171 size_t i = 0; | 166 size_t i = 0; |
172 size_t residual = length & 0x7; | 167 size_t residual = length & 0x7; |
173 | 168 |
174 if (vector == NULL || length == 0) { | 169 assert(length > 0); |
175 return maximum; | |
176 } | |
177 | 170 |
178 const int32_t* p_start = vector; | 171 const int32_t* p_start = vector; |
179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 172 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 173 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
181 | 174 |
182 // First part, unroll the loop 8 times. | 175 // First part, unroll the loop 8 times. |
183 for (i = 0; i < length - residual; i += 8) { | 176 for (i = 0; i < length - residual; i += 8) { |
184 int32x4_t in32x4_0 = vld1q_s32(p_start); | 177 int32x4_t in32x4_0 = vld1q_s32(p_start); |
185 p_start += 4; | 178 p_start += 4; |
186 int32x4_t in32x4_1 = vld1q_s32(p_start); | 179 int32x4_t in32x4_1 = vld1q_s32(p_start); |
(...skipping 21 matching lines...) Expand all Loading... |
208 return maximum; | 201 return maximum; |
209 } | 202 } |
210 | 203 |
211 // Minimum value of word16 vector. NEON intrinsics version for | 204 // Minimum value of word16 vector. NEON intrinsics version for |
212 // ARM 32-bit/64-bit platforms. | 205 // ARM 32-bit/64-bit platforms. |
213 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { | 206 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { |
214 int16_t minimum = WEBRTC_SPL_WORD16_MAX; | 207 int16_t minimum = WEBRTC_SPL_WORD16_MAX; |
215 size_t i = 0; | 208 size_t i = 0; |
216 size_t residual = length & 0x7; | 209 size_t residual = length & 0x7; |
217 | 210 |
218 if (vector == NULL || length == 0) { | 211 assert(length > 0); |
219 return minimum; | |
220 } | |
221 | 212 |
222 const int16_t* p_start = vector; | 213 const int16_t* p_start = vector; |
223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 214 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
224 | 215 |
225 // First part, unroll the loop 8 times. | 216 // First part, unroll the loop 8 times. |
226 for (i = 0; i < length - residual; i += 8) { | 217 for (i = 0; i < length - residual; i += 8) { |
227 int16x8_t in16x8 = vld1q_s16(p_start); | 218 int16x8_t in16x8 = vld1q_s16(p_start); |
228 min16x8 = vminq_s16(min16x8, in16x8); | 219 min16x8 = vminq_s16(min16x8, in16x8); |
229 p_start += 8; | 220 p_start += 8; |
230 } | 221 } |
(...skipping 17 matching lines...) Expand all Loading... |
248 return minimum; | 239 return minimum; |
249 } | 240 } |
250 | 241 |
251 // Minimum value of word32 vector. NEON intrinsics version for | 242 // Minimum value of word32 vector. NEON intrinsics version for |
252 // ARM 32-bit/64-bit platforms. | 243 // ARM 32-bit/64-bit platforms. |
253 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { | 244 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { |
254 int32_t minimum = WEBRTC_SPL_WORD32_MAX; | 245 int32_t minimum = WEBRTC_SPL_WORD32_MAX; |
255 size_t i = 0; | 246 size_t i = 0; |
256 size_t residual = length & 0x7; | 247 size_t residual = length & 0x7; |
257 | 248 |
258 if (vector == NULL || length == 0) { | 249 assert(length > 0); |
259 return minimum; | |
260 } | |
261 | 250 |
262 const int32_t* p_start = vector; | 251 const int32_t* p_start = vector; |
263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 252 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 253 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
265 | 254 |
266 // First part, unroll the loop 8 times. | 255 // First part, unroll the loop 8 times. |
267 for (i = 0; i < length - residual; i += 8) { | 256 for (i = 0; i < length - residual; i += 8) { |
268 int32x4_t in32x4_0 = vld1q_s32(p_start); | 257 int32x4_t in32x4_0 = vld1q_s32(p_start); |
269 p_start += 4; | 258 p_start += 4; |
270 int32x4_t in32x4_1 = vld1q_s32(p_start); | 259 int32x4_t in32x4_1 = vld1q_s32(p_start); |
(...skipping 14 matching lines...) Expand all Loading... |
285 | 274 |
286 // Second part, do the remaining iterations (if any). | 275 // Second part, do the remaining iterations (if any). |
287 for (i = residual; i > 0; i--) { | 276 for (i = residual; i > 0; i--) { |
288 if (*p_start < minimum) | 277 if (*p_start < minimum) |
289 minimum = *p_start; | 278 minimum = *p_start; |
290 p_start++; | 279 p_start++; |
291 } | 280 } |
292 return minimum; | 281 return minimum; |
293 } | 282 } |
294 | 283 |
OLD | NEW |