OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
79 | 79 |
80 if (vector == NULL || length <= 0) { | 80 if (vector == NULL || length <= 0) { |
81 return -1; | 81 return -1; |
82 } | 82 } |
83 | 83 |
84 const int32_t* p_start = vector; | 84 const int32_t* p_start = vector; |
85 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 85 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
86 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 86 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
87 | 87 |
88 // First part, unroll the loop 8 times. | 88 // First part, unroll the loop 8 times. |
89 for (i = length - residual; i >0; i -= 8) { | 89 for (i = 0; i < length - residual; i += 8) { |
90 int32x4_t in32x4_0 = vld1q_s32(p_start); | 90 int32x4_t in32x4_0 = vld1q_s32(p_start); |
91 p_start += 4; | 91 p_start += 4; |
92 int32x4_t in32x4_1 = vld1q_s32(p_start); | 92 int32x4_t in32x4_1 = vld1q_s32(p_start); |
93 p_start += 4; | 93 p_start += 4; |
94 in32x4_0 = vabsq_s32(in32x4_0); | 94 in32x4_0 = vabsq_s32(in32x4_0); |
95 in32x4_1 = vabsq_s32(in32x4_1); | 95 in32x4_1 = vabsq_s32(in32x4_1); |
96 // vabs doesn't change the value of 0x80000000. | 96 // vabs doesn't change the value of 0x80000000. |
97 // Use u32 so we don't lose the value 0x80000000. | 97 // Use u32 so we don't lose the value 0x80000000. |
98 max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0)); | 98 max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0)); |
99 max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1)); | 99 max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1)); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
132 int residual = length & 0x7; | 132 int residual = length & 0x7; |
133 | 133 |
134 if (vector == NULL || length <= 0) { | 134 if (vector == NULL || length <= 0) { |
135 return maximum; | 135 return maximum; |
136 } | 136 } |
137 | 137 |
138 const int16_t* p_start = vector; | 138 const int16_t* p_start = vector; |
139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
140 | 140 |
141 // First part, unroll the loop 8 times. | 141 // First part, unroll the loop 8 times. |
142 for (i = length - residual; i >0; i -= 8) { | 142 for (i = 0; i < length - residual; i += 8) { |
143 int16x8_t in16x8 = vld1q_s16(p_start); | 143 int16x8_t in16x8 = vld1q_s16(p_start); |
144 max16x8 = vmaxq_s16(max16x8, in16x8); | 144 max16x8 = vmaxq_s16(max16x8, in16x8); |
145 p_start += 8; | 145 p_start += 8; |
146 } | 146 } |
147 | 147 |
148 #if defined(WEBRTC_ARCH_ARM64) | 148 #if defined(WEBRTC_ARCH_ARM64) |
149 maximum = vmaxvq_s16(max16x8); | 149 maximum = vmaxvq_s16(max16x8); |
150 #else | 150 #else |
151 int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); | 151 int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); |
152 max16x4 = vpmax_s16(max16x4, max16x4); | 152 max16x4 = vpmax_s16(max16x4, max16x4); |
(...skipping 20 matching lines...) Expand all Loading... |
173 | 173 |
174 if (vector == NULL || length <= 0) { | 174 if (vector == NULL || length <= 0) { |
175 return maximum; | 175 return maximum; |
176 } | 176 } |
177 | 177 |
178 const int32_t* p_start = vector; | 178 const int32_t* p_start = vector; |
179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
181 | 181 |
182 // First part, unroll the loop 8 times. | 182 // First part, unroll the loop 8 times. |
183 for (i = length - residual; i >0; i -= 8) { | 183 for (i = 0; i < length - residual; i += 8) { |
184 int32x4_t in32x4_0 = vld1q_s32(p_start); | 184 int32x4_t in32x4_0 = vld1q_s32(p_start); |
185 p_start += 4; | 185 p_start += 4; |
186 int32x4_t in32x4_1 = vld1q_s32(p_start); | 186 int32x4_t in32x4_1 = vld1q_s32(p_start); |
187 p_start += 4; | 187 p_start += 4; |
188 max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0); | 188 max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0); |
189 max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1); | 189 max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1); |
190 } | 190 } |
191 | 191 |
192 int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1); | 192 int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1); |
193 #if defined(WEBRTC_ARCH_ARM64) | 193 #if defined(WEBRTC_ARCH_ARM64) |
(...skipping 22 matching lines...) Expand all Loading... |
216 int residual = length & 0x7; | 216 int residual = length & 0x7; |
217 | 217 |
218 if (vector == NULL || length <= 0) { | 218 if (vector == NULL || length <= 0) { |
219 return minimum; | 219 return minimum; |
220 } | 220 } |
221 | 221 |
222 const int16_t* p_start = vector; | 222 const int16_t* p_start = vector; |
223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
224 | 224 |
225 // First part, unroll the loop 8 times. | 225 // First part, unroll the loop 8 times. |
226 for (i = length - residual; i >0; i -= 8) { | 226 for (i = 0; i < length - residual; i += 8) { |
227 int16x8_t in16x8 = vld1q_s16(p_start); | 227 int16x8_t in16x8 = vld1q_s16(p_start); |
228 min16x8 = vminq_s16(min16x8, in16x8); | 228 min16x8 = vminq_s16(min16x8, in16x8); |
229 p_start += 8; | 229 p_start += 8; |
230 } | 230 } |
231 | 231 |
232 #if defined(WEBRTC_ARCH_ARM64) | 232 #if defined(WEBRTC_ARCH_ARM64) |
233 minimum = vminvq_s16(min16x8); | 233 minimum = vminvq_s16(min16x8); |
234 #else | 234 #else |
235 int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); | 235 int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); |
236 min16x4 = vpmin_s16(min16x4, min16x4); | 236 min16x4 = vpmin_s16(min16x4, min16x4); |
(...skipping 20 matching lines...) Expand all Loading... |
257 | 257 |
258 if (vector == NULL || length <= 0) { | 258 if (vector == NULL || length <= 0) { |
259 return minimum; | 259 return minimum; |
260 } | 260 } |
261 | 261 |
262 const int32_t* p_start = vector; | 262 const int32_t* p_start = vector; |
263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
265 | 265 |
266 // First part, unroll the loop 8 times. | 266 // First part, unroll the loop 8 times. |
267 for (i = length - residual; i >0; i -= 8) { | 267 for (i = 0; i < length - residual; i += 8) { |
268 int32x4_t in32x4_0 = vld1q_s32(p_start); | 268 int32x4_t in32x4_0 = vld1q_s32(p_start); |
269 p_start += 4; | 269 p_start += 4; |
270 int32x4_t in32x4_1 = vld1q_s32(p_start); | 270 int32x4_t in32x4_1 = vld1q_s32(p_start); |
271 p_start += 4; | 271 p_start += 4; |
272 min32x4_0 = vminq_s32(min32x4_0, in32x4_0); | 272 min32x4_0 = vminq_s32(min32x4_0, in32x4_0); |
273 min32x4_1 = vminq_s32(min32x4_1, in32x4_1); | 273 min32x4_1 = vminq_s32(min32x4_1, in32x4_1); |
274 } | 274 } |
275 | 275 |
276 int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1); | 276 int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1); |
277 #if defined(WEBRTC_ARCH_ARM64) | 277 #if defined(WEBRTC_ARCH_ARM64) |
278 minimum = vminvq_s32(min32x4); | 278 minimum = vminvq_s32(min32x4); |
279 #else | 279 #else |
280 int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4)); | 280 int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4)); |
281 min32x2 = vpmin_s32(min32x2, min32x2); | 281 min32x2 = vpmin_s32(min32x2, min32x2); |
282 | 282 |
283 minimum = vget_lane_s32(min32x2, 0); | 283 minimum = vget_lane_s32(min32x2, 0); |
284 #endif | 284 #endif |
285 | 285 |
286 // Second part, do the remaining iterations (if any). | 286 // Second part, do the remaining iterations (if any). |
287 for (i = residual; i > 0; i--) { | 287 for (i = residual; i > 0; i--) { |
288 if (*p_start < minimum) | 288 if (*p_start < minimum) |
289 minimum = *p_start; | 289 minimum = *p_start; |
290 p_start++; | 290 p_start++; |
291 } | 291 } |
292 return minimum; | 292 return minimum; |
293 } | 293 } |
294 | 294 |
OLD | NEW |