| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 #include <stdlib.h> | 12 #include <stdlib.h> |
| 13 | 13 |
| 14 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 14 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
| 15 | 15 |
| 16 // Maximum absolute value of word16 vector. C version for generic platforms. | 16 // Maximum absolute value of word16 vector. C version for generic platforms. |
| 17 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length) { | 17 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { |
| 18 int absolute = 0, maximum = 0; | 18 int absolute = 0, maximum = 0; |
| 19 | 19 |
| 20 if (vector == NULL || length <= 0) { | 20 if (vector == NULL || length == 0) { |
| 21 return -1; | 21 return -1; |
| 22 } | 22 } |
| 23 | 23 |
| 24 const int16_t* p_start = vector; | 24 const int16_t* p_start = vector; |
| 25 int rest = length & 7; | 25 size_t rest = length & 7; |
| 26 const int16_t* p_end = vector + length - rest; | 26 const int16_t* p_end = vector + length - rest; |
| 27 | 27 |
| 28 int16x8_t v; | 28 int16x8_t v; |
| 29 uint16x8_t max_qv; | 29 uint16x8_t max_qv; |
| 30 max_qv = vdupq_n_u16(0); | 30 max_qv = vdupq_n_u16(0); |
| 31 | 31 |
| 32 while (p_start < p_end) { | 32 while (p_start < p_end) { |
| 33 v = vld1q_s16(p_start); | 33 v = vld1q_s16(p_start); |
| 34 // Note vabs doesn't change the value of -32768. | 34 // Note vabs doesn't change the value of -32768. |
| 35 v = vabsq_s16(v); | 35 v = vabsq_s16(v); |
| (...skipping 26 matching lines...) Expand all Loading... |
| 62 // Guard the case for abs(-32768). | 62 // Guard the case for abs(-32768). |
| 63 if (maximum > WEBRTC_SPL_WORD16_MAX) { | 63 if (maximum > WEBRTC_SPL_WORD16_MAX) { |
| 64 maximum = WEBRTC_SPL_WORD16_MAX; | 64 maximum = WEBRTC_SPL_WORD16_MAX; |
| 65 } | 65 } |
| 66 | 66 |
| 67 return (int16_t)maximum; | 67 return (int16_t)maximum; |
| 68 } | 68 } |
| 69 | 69 |
| 70 // Maximum absolute value of word32 vector. NEON intrinsics version for | 70 // Maximum absolute value of word32 vector. NEON intrinsics version for |
| 71 // ARM 32-bit/64-bit platforms. | 71 // ARM 32-bit/64-bit platforms. |
| 72 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length) { | 72 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { |
| 73 // Use uint32_t for the local variables, to accommodate the return value | 73 // Use uint32_t for the local variables, to accommodate the return value |
| 74 // of abs(0x80000000), which is 0x80000000. | 74 // of abs(0x80000000), which is 0x80000000. |
| 75 | 75 |
| 76 uint32_t absolute = 0, maximum = 0; | 76 uint32_t absolute = 0, maximum = 0; |
| 77 int i = 0; | 77 size_t i = 0; |
| 78 int residual = length & 0x7; | 78 size_t residual = length & 0x7; |
| 79 | 79 |
| 80 if (vector == NULL || length <= 0) { | 80 if (vector == NULL || length == 0) { |
| 81 return -1; | 81 return -1; |
| 82 } | 82 } |
| 83 | 83 |
| 84 const int32_t* p_start = vector; | 84 const int32_t* p_start = vector; |
| 85 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 85 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
| 86 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 86 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
| 87 | 87 |
| 88 // First part, unroll the loop 8 times. | 88 // First part, unroll the loop 8 times. |
| 89 for (i = 0; i < length - residual; i += 8) { | 89 for (i = 0; i < length - residual; i += 8) { |
| 90 int32x4_t in32x4_0 = vld1q_s32(p_start); | 90 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| (...skipping 28 matching lines...) Expand all Loading... |
| 119 } | 119 } |
| 120 | 120 |
| 121 // Guard against the case for 0x80000000. | 121 // Guard against the case for 0x80000000. |
| 122 maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); | 122 maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); |
| 123 | 123 |
| 124 return (int32_t)maximum; | 124 return (int32_t)maximum; |
| 125 } | 125 } |
| 126 | 126 |
| 127 // Maximum value of word16 vector. NEON intrinsics version for | 127 // Maximum value of word16 vector. NEON intrinsics version for |
| 128 // ARM 32-bit/64-bit platforms. | 128 // ARM 32-bit/64-bit platforms. |
| 129 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length) { | 129 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { |
| 130 int16_t maximum = WEBRTC_SPL_WORD16_MIN; | 130 int16_t maximum = WEBRTC_SPL_WORD16_MIN; |
| 131 int i = 0; | 131 size_t i = 0; |
| 132 int residual = length & 0x7; | 132 size_t residual = length & 0x7; |
| 133 | 133 |
| 134 if (vector == NULL || length <= 0) { | 134 if (vector == NULL || length == 0) { |
| 135 return maximum; | 135 return maximum; |
| 136 } | 136 } |
| 137 | 137 |
| 138 const int16_t* p_start = vector; | 138 const int16_t* p_start = vector; |
| 139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 139 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
| 140 | 140 |
| 141 // First part, unroll the loop 8 times. | 141 // First part, unroll the loop 8 times. |
| 142 for (i = 0; i < length - residual; i += 8) { | 142 for (i = 0; i < length - residual; i += 8) { |
| 143 int16x8_t in16x8 = vld1q_s16(p_start); | 143 int16x8_t in16x8 = vld1q_s16(p_start); |
| 144 max16x8 = vmaxq_s16(max16x8, in16x8); | 144 max16x8 = vmaxq_s16(max16x8, in16x8); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 159 for (i = residual; i > 0; i--) { | 159 for (i = residual; i > 0; i--) { |
| 160 if (*p_start > maximum) | 160 if (*p_start > maximum) |
| 161 maximum = *p_start; | 161 maximum = *p_start; |
| 162 p_start++; | 162 p_start++; |
| 163 } | 163 } |
| 164 return maximum; | 164 return maximum; |
| 165 } | 165 } |
| 166 | 166 |
| 167 // Maximum value of word32 vector. NEON intrinsics version for | 167 // Maximum value of word32 vector. NEON intrinsics version for |
| 168 // ARM 32-bit/64-bit platforms. | 168 // ARM 32-bit/64-bit platforms. |
| 169 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length) { | 169 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { |
| 170 int32_t maximum = WEBRTC_SPL_WORD32_MIN; | 170 int32_t maximum = WEBRTC_SPL_WORD32_MIN; |
| 171 int i = 0; | 171 size_t i = 0; |
| 172 int residual = length & 0x7; | 172 size_t residual = length & 0x7; |
| 173 | 173 |
| 174 if (vector == NULL || length <= 0) { | 174 if (vector == NULL || length == 0) { |
| 175 return maximum; | 175 return maximum; |
| 176 } | 176 } |
| 177 | 177 |
| 178 const int32_t* p_start = vector; | 178 const int32_t* p_start = vector; |
| 179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 179 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
| 180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 180 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
| 181 | 181 |
| 182 // First part, unroll the loop 8 times. | 182 // First part, unroll the loop 8 times. |
| 183 for (i = 0; i < length - residual; i += 8) { | 183 for (i = 0; i < length - residual; i += 8) { |
| 184 int32x4_t in32x4_0 = vld1q_s32(p_start); | 184 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| (...skipping 18 matching lines...) Expand all Loading... |
| 203 for (i = residual; i > 0; i--) { | 203 for (i = residual; i > 0; i--) { |
| 204 if (*p_start > maximum) | 204 if (*p_start > maximum) |
| 205 maximum = *p_start; | 205 maximum = *p_start; |
| 206 p_start++; | 206 p_start++; |
| 207 } | 207 } |
| 208 return maximum; | 208 return maximum; |
| 209 } | 209 } |
| 210 | 210 |
| 211 // Minimum value of word16 vector. NEON intrinsics version for | 211 // Minimum value of word16 vector. NEON intrinsics version for |
| 212 // ARM 32-bit/64-bit platforms. | 212 // ARM 32-bit/64-bit platforms. |
| 213 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length) { | 213 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { |
| 214 int16_t minimum = WEBRTC_SPL_WORD16_MAX; | 214 int16_t minimum = WEBRTC_SPL_WORD16_MAX; |
| 215 int i = 0; | 215 size_t i = 0; |
| 216 int residual = length & 0x7; | 216 size_t residual = length & 0x7; |
| 217 | 217 |
| 218 if (vector == NULL || length <= 0) { | 218 if (vector == NULL || length == 0) { |
| 219 return minimum; | 219 return minimum; |
| 220 } | 220 } |
| 221 | 221 |
| 222 const int16_t* p_start = vector; | 222 const int16_t* p_start = vector; |
| 223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 223 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
| 224 | 224 |
| 225 // First part, unroll the loop 8 times. | 225 // First part, unroll the loop 8 times. |
| 226 for (i = 0; i < length - residual; i += 8) { | 226 for (i = 0; i < length - residual; i += 8) { |
| 227 int16x8_t in16x8 = vld1q_s16(p_start); | 227 int16x8_t in16x8 = vld1q_s16(p_start); |
| 228 min16x8 = vminq_s16(min16x8, in16x8); | 228 min16x8 = vminq_s16(min16x8, in16x8); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 243 for (i = residual; i > 0; i--) { | 243 for (i = residual; i > 0; i--) { |
| 244 if (*p_start < minimum) | 244 if (*p_start < minimum) |
| 245 minimum = *p_start; | 245 minimum = *p_start; |
| 246 p_start++; | 246 p_start++; |
| 247 } | 247 } |
| 248 return minimum; | 248 return minimum; |
| 249 } | 249 } |
| 250 | 250 |
| 251 // Minimum value of word32 vector. NEON intrinsics version for | 251 // Minimum value of word32 vector. NEON intrinsics version for |
| 252 // ARM 32-bit/64-bit platforms. | 252 // ARM 32-bit/64-bit platforms. |
| 253 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length) { | 253 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { |
| 254 int32_t minimum = WEBRTC_SPL_WORD32_MAX; | 254 int32_t minimum = WEBRTC_SPL_WORD32_MAX; |
| 255 int i = 0; | 255 size_t i = 0; |
| 256 int residual = length & 0x7; | 256 size_t residual = length & 0x7; |
| 257 | 257 |
| 258 if (vector == NULL || length <= 0) { | 258 if (vector == NULL || length == 0) { |
| 259 return minimum; | 259 return minimum; |
| 260 } | 260 } |
| 261 | 261 |
| 262 const int32_t* p_start = vector; | 262 const int32_t* p_start = vector; |
| 263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 263 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
| 264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 264 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
| 265 | 265 |
| 266 // First part, unroll the loop 8 times. | 266 // First part, unroll the loop 8 times. |
| 267 for (i = 0; i < length - residual; i += 8) { | 267 for (i = 0; i < length - residual; i += 8) { |
| 268 int32x4_t in32x4_0 = vld1q_s32(p_start); | 268 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 285 | 285 |
| 286 // Second part, do the remaining iterations (if any). | 286 // Second part, do the remaining iterations (if any). |
| 287 for (i = residual; i > 0; i--) { | 287 for (i = residual; i > 0; i--) { |
| 288 if (*p_start < minimum) | 288 if (*p_start < minimum) |
| 289 minimum = *p_start; | 289 minimum = *p_start; |
| 290 p_start++; | 290 p_start++; |
| 291 } | 291 } |
| 292 return minimum; | 292 return minimum; |
| 293 } | 293 } |
| 294 | 294 |
| OLD | NEW |