| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 79 | 79 |
| 80 if (vector == NULL || length <= 0) { | 80 if (vector == NULL || length <= 0) { |
| 81 return -1; | 81 return -1; |
| 82 } | 82 } |
| 83 | 83 |
| 84 const int32_t* p_start = vector; | 84 const int32_t* p_start = vector; |
| 85 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 85 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
| 86 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 86 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
| 87 | 87 |
| 88 // First part, unroll the loop 8 times. | 88 // First part, unroll the loop 8 times. |
| 89 for (i = length - residual; i >0; i -= 8) { | 89 for (i = 0; i < length - residual; i += 8) { |
| 90 int32x4_t in32x4_0 = vld1q_s32(p_start); | 90 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| 91 p_start += 4; | 91 p_start += 4; |
| 92 int32x4_t in32x4_1 = vld1q_s32(p_start); | 92 int32x4_t in32x4_1 = vld1q_s32(p_start); |
| 93 p_start += 4; | 93 p_start += 4; |
| 94 in32x4_0 = vabsq_s32(in32x4_0); | 94 in32x4_0 = vabsq_s32(in32x4_0); |
| 95 in32x4_1 = vabsq_s32(in32x4_1); | 95 in32x4_1 = vabsq_s32(in32x4_1); |
| 96 // vabs doesn't change the value of 0x80000000. | 96 // vabs doesn't change the value of 0x80000000. |
| 97 // Use u32 so we don't lose the value 0x80000000. | 97 // Use u32 so we don't lose the value 0x80000000. |
| 98 max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0)); | 98 max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0)); |
| 99 max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1)); | 99 max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1)); |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 132 int residual = length & 0x7; | 132 int residual = length & 0x7; |
| 133 | 133 |
| 134 if (vector == NULL || length <= 0) { | 134 if (vector == NULL || length <= 0) { |
| 135 return maximum; | 135 return maximum; |
| 136 } | 136 } |
| 137 | 137 |
| 138 const int16_t* p_start = vector; | 138 const int16_t* p_start = vector; |
| 139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
| 140 | 140 |
| 141 // First part, unroll the loop 8 times. | 141 // First part, unroll the loop 8 times. |
| 142 for (i = length - residual; i >0; i -= 8) { | 142 for (i = 0; i < length - residual; i += 8) { |
| 143 int16x8_t in16x8 = vld1q_s16(p_start); | 143 int16x8_t in16x8 = vld1q_s16(p_start); |
| 144 max16x8 = vmaxq_s16(max16x8, in16x8); | 144 max16x8 = vmaxq_s16(max16x8, in16x8); |
| 145 p_start += 8; | 145 p_start += 8; |
| 146 } | 146 } |
| 147 | 147 |
| 148 #if defined(WEBRTC_ARCH_ARM64) | 148 #if defined(WEBRTC_ARCH_ARM64) |
| 149 maximum = vmaxvq_s16(max16x8); | 149 maximum = vmaxvq_s16(max16x8); |
| 150 #else | 150 #else |
| 151 int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); | 151 int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); |
| 152 max16x4 = vpmax_s16(max16x4, max16x4); | 152 max16x4 = vpmax_s16(max16x4, max16x4); |
| (...skipping 20 matching lines...) Expand all Loading... |
| 173 | 173 |
| 174 if (vector == NULL || length <= 0) { | 174 if (vector == NULL || length <= 0) { |
| 175 return maximum; | 175 return maximum; |
| 176 } | 176 } |
| 177 | 177 |
| 178 const int32_t* p_start = vector; | 178 const int32_t* p_start = vector; |
| 179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
| 180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
| 181 | 181 |
| 182 // First part, unroll the loop 8 times. | 182 // First part, unroll the loop 8 times. |
| 183 for (i = length - residual; i >0; i -= 8) { | 183 for (i = 0; i < length - residual; i += 8) { |
| 184 int32x4_t in32x4_0 = vld1q_s32(p_start); | 184 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| 185 p_start += 4; | 185 p_start += 4; |
| 186 int32x4_t in32x4_1 = vld1q_s32(p_start); | 186 int32x4_t in32x4_1 = vld1q_s32(p_start); |
| 187 p_start += 4; | 187 p_start += 4; |
| 188 max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0); | 188 max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0); |
| 189 max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1); | 189 max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1); |
| 190 } | 190 } |
| 191 | 191 |
| 192 int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1); | 192 int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1); |
| 193 #if defined(WEBRTC_ARCH_ARM64) | 193 #if defined(WEBRTC_ARCH_ARM64) |
| (...skipping 22 matching lines...) Expand all Loading... |
| 216 int residual = length & 0x7; | 216 int residual = length & 0x7; |
| 217 | 217 |
| 218 if (vector == NULL || length <= 0) { | 218 if (vector == NULL || length <= 0) { |
| 219 return minimum; | 219 return minimum; |
| 220 } | 220 } |
| 221 | 221 |
| 222 const int16_t* p_start = vector; | 222 const int16_t* p_start = vector; |
| 223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
| 224 | 224 |
| 225 // First part, unroll the loop 8 times. | 225 // First part, unroll the loop 8 times. |
| 226 for (i = length - residual; i >0; i -= 8) { | 226 for (i = 0; i < length - residual; i += 8) { |
| 227 int16x8_t in16x8 = vld1q_s16(p_start); | 227 int16x8_t in16x8 = vld1q_s16(p_start); |
| 228 min16x8 = vminq_s16(min16x8, in16x8); | 228 min16x8 = vminq_s16(min16x8, in16x8); |
| 229 p_start += 8; | 229 p_start += 8; |
| 230 } | 230 } |
| 231 | 231 |
| 232 #if defined(WEBRTC_ARCH_ARM64) | 232 #if defined(WEBRTC_ARCH_ARM64) |
| 233 minimum = vminvq_s16(min16x8); | 233 minimum = vminvq_s16(min16x8); |
| 234 #else | 234 #else |
| 235 int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); | 235 int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); |
| 236 min16x4 = vpmin_s16(min16x4, min16x4); | 236 min16x4 = vpmin_s16(min16x4, min16x4); |
| (...skipping 20 matching lines...) Expand all Loading... |
| 257 | 257 |
| 258 if (vector == NULL || length <= 0) { | 258 if (vector == NULL || length <= 0) { |
| 259 return minimum; | 259 return minimum; |
| 260 } | 260 } |
| 261 | 261 |
| 262 const int32_t* p_start = vector; | 262 const int32_t* p_start = vector; |
| 263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
| 264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
| 265 | 265 |
| 266 // First part, unroll the loop 8 times. | 266 // First part, unroll the loop 8 times. |
| 267 for (i = length - residual; i >0; i -= 8) { | 267 for (i = 0; i < length - residual; i += 8) { |
| 268 int32x4_t in32x4_0 = vld1q_s32(p_start); | 268 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| 269 p_start += 4; | 269 p_start += 4; |
| 270 int32x4_t in32x4_1 = vld1q_s32(p_start); | 270 int32x4_t in32x4_1 = vld1q_s32(p_start); |
| 271 p_start += 4; | 271 p_start += 4; |
| 272 min32x4_0 = vminq_s32(min32x4_0, in32x4_0); | 272 min32x4_0 = vminq_s32(min32x4_0, in32x4_0); |
| 273 min32x4_1 = vminq_s32(min32x4_1, in32x4_1); | 273 min32x4_1 = vminq_s32(min32x4_1, in32x4_1); |
| 274 } | 274 } |
| 275 | 275 |
| 276 int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1); | 276 int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1); |
| 277 #if defined(WEBRTC_ARCH_ARM64) | 277 #if defined(WEBRTC_ARCH_ARM64) |
| 278 minimum = vminvq_s32(min32x4); | 278 minimum = vminvq_s32(min32x4); |
| 279 #else | 279 #else |
| 280 int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4)); | 280 int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4)); |
| 281 min32x2 = vpmin_s32(min32x2, min32x2); | 281 min32x2 = vpmin_s32(min32x2, min32x2); |
| 282 | 282 |
| 283 minimum = vget_lane_s32(min32x2, 0); | 283 minimum = vget_lane_s32(min32x2, 0); |
| 284 #endif | 284 #endif |
| 285 | 285 |
| 286 // Second part, do the remaining iterations (if any). | 286 // Second part, do the remaining iterations (if any). |
| 287 for (i = residual; i > 0; i--) { | 287 for (i = residual; i > 0; i--) { |
| 288 if (*p_start < minimum) | 288 if (*p_start < minimum) |
| 289 minimum = *p_start; | 289 minimum = *p_start; |
| 290 p_start++; | 290 p_start++; |
| 291 } | 291 } |
| 292 return minimum; | 292 return minimum; |
| 293 } | 293 } |
| 294 | 294 |
| OLD | NEW |