| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 #include <assert.h> | |
| 13 #include <stdlib.h> | 12 #include <stdlib.h> |
| 14 | 13 |
| 14 #include "webrtc/base/checks.h" |
| 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
| 16 | 16 |
| 17 // Maximum absolute value of word16 vector. C version for generic platforms. | 17 // Maximum absolute value of word16 vector. C version for generic platforms. |
| 18 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { | 18 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { |
| 19 int absolute = 0, maximum = 0; | 19 int absolute = 0, maximum = 0; |
| 20 | 20 |
| 21 assert(length > 0); | 21 RTC_DCHECK_GT(length, 0); |
| 22 | 22 |
| 23 const int16_t* p_start = vector; | 23 const int16_t* p_start = vector; |
| 24 size_t rest = length & 7; | 24 size_t rest = length & 7; |
| 25 const int16_t* p_end = vector + length - rest; | 25 const int16_t* p_end = vector + length - rest; |
| 26 | 26 |
| 27 int16x8_t v; | 27 int16x8_t v; |
| 28 uint16x8_t max_qv; | 28 uint16x8_t max_qv; |
| 29 max_qv = vdupq_n_u16(0); | 29 max_qv = vdupq_n_u16(0); |
| 30 | 30 |
| 31 while (p_start < p_end) { | 31 while (p_start < p_end) { |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 69 // Maximum absolute value of word32 vector. NEON intrinsics version for | 69 // Maximum absolute value of word32 vector. NEON intrinsics version for |
| 70 // ARM 32-bit/64-bit platforms. | 70 // ARM 32-bit/64-bit platforms. |
| 71 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { | 71 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { |
| 72 // Use uint32_t for the local variables, to accommodate the return value | 72 // Use uint32_t for the local variables, to accommodate the return value |
| 73 // of abs(0x80000000), which is 0x80000000. | 73 // of abs(0x80000000), which is 0x80000000. |
| 74 | 74 |
| 75 uint32_t absolute = 0, maximum = 0; | 75 uint32_t absolute = 0, maximum = 0; |
| 76 size_t i = 0; | 76 size_t i = 0; |
| 77 size_t residual = length & 0x7; | 77 size_t residual = length & 0x7; |
| 78 | 78 |
| 79 assert(length > 0); | 79 RTC_DCHECK_GT(length, 0); |
| 80 | 80 |
| 81 const int32_t* p_start = vector; | 81 const int32_t* p_start = vector; |
| 82 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 82 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
| 83 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 83 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
| 84 | 84 |
| 85 // First part, unroll the loop 8 times. | 85 // First part, unroll the loop 8 times. |
| 86 for (i = 0; i < length - residual; i += 8) { | 86 for (i = 0; i < length - residual; i += 8) { |
| 87 int32x4_t in32x4_0 = vld1q_s32(p_start); | 87 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| 88 p_start += 4; | 88 p_start += 4; |
| 89 int32x4_t in32x4_1 = vld1q_s32(p_start); | 89 int32x4_t in32x4_1 = vld1q_s32(p_start); |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 121 return (int32_t)maximum; | 121 return (int32_t)maximum; |
| 122 } | 122 } |
| 123 | 123 |
| 124 // Maximum value of word16 vector. NEON intrinsics version for | 124 // Maximum value of word16 vector. NEON intrinsics version for |
| 125 // ARM 32-bit/64-bit platforms. | 125 // ARM 32-bit/64-bit platforms. |
| 126 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { | 126 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { |
| 127 int16_t maximum = WEBRTC_SPL_WORD16_MIN; | 127 int16_t maximum = WEBRTC_SPL_WORD16_MIN; |
| 128 size_t i = 0; | 128 size_t i = 0; |
| 129 size_t residual = length & 0x7; | 129 size_t residual = length & 0x7; |
| 130 | 130 |
| 131 assert(length > 0); | 131 RTC_DCHECK_GT(length, 0); |
| 132 | 132 |
| 133 const int16_t* p_start = vector; | 133 const int16_t* p_start = vector; |
| 134 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 134 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
| 135 | 135 |
| 136 // First part, unroll the loop 8 times. | 136 // First part, unroll the loop 8 times. |
| 137 for (i = 0; i < length - residual; i += 8) { | 137 for (i = 0; i < length - residual; i += 8) { |
| 138 int16x8_t in16x8 = vld1q_s16(p_start); | 138 int16x8_t in16x8 = vld1q_s16(p_start); |
| 139 max16x8 = vmaxq_s16(max16x8, in16x8); | 139 max16x8 = vmaxq_s16(max16x8, in16x8); |
| 140 p_start += 8; | 140 p_start += 8; |
| 141 } | 141 } |
| (...skipping 17 matching lines...) Expand all Loading... |
| 159 return maximum; | 159 return maximum; |
| 160 } | 160 } |
| 161 | 161 |
| 162 // Maximum value of word32 vector. NEON intrinsics version for | 162 // Maximum value of word32 vector. NEON intrinsics version for |
| 163 // ARM 32-bit/64-bit platforms. | 163 // ARM 32-bit/64-bit platforms. |
| 164 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { | 164 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { |
| 165 int32_t maximum = WEBRTC_SPL_WORD32_MIN; | 165 int32_t maximum = WEBRTC_SPL_WORD32_MIN; |
| 166 size_t i = 0; | 166 size_t i = 0; |
| 167 size_t residual = length & 0x7; | 167 size_t residual = length & 0x7; |
| 168 | 168 |
| 169 assert(length > 0); | 169 RTC_DCHECK_GT(length, 0); |
| 170 | 170 |
| 171 const int32_t* p_start = vector; | 171 const int32_t* p_start = vector; |
| 172 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 172 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
| 173 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 173 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
| 174 | 174 |
| 175 // First part, unroll the loop 8 times. | 175 // First part, unroll the loop 8 times. |
| 176 for (i = 0; i < length - residual; i += 8) { | 176 for (i = 0; i < length - residual; i += 8) { |
| 177 int32x4_t in32x4_0 = vld1q_s32(p_start); | 177 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| 178 p_start += 4; | 178 p_start += 4; |
| 179 int32x4_t in32x4_1 = vld1q_s32(p_start); | 179 int32x4_t in32x4_1 = vld1q_s32(p_start); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 201 return maximum; | 201 return maximum; |
| 202 } | 202 } |
| 203 | 203 |
| 204 // Minimum value of word16 vector. NEON intrinsics version for | 204 // Minimum value of word16 vector. NEON intrinsics version for |
| 205 // ARM 32-bit/64-bit platforms. | 205 // ARM 32-bit/64-bit platforms. |
| 206 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { | 206 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { |
| 207 int16_t minimum = WEBRTC_SPL_WORD16_MAX; | 207 int16_t minimum = WEBRTC_SPL_WORD16_MAX; |
| 208 size_t i = 0; | 208 size_t i = 0; |
| 209 size_t residual = length & 0x7; | 209 size_t residual = length & 0x7; |
| 210 | 210 |
| 211 assert(length > 0); | 211 RTC_DCHECK_GT(length, 0); |
| 212 | 212 |
| 213 const int16_t* p_start = vector; | 213 const int16_t* p_start = vector; |
| 214 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 214 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
| 215 | 215 |
| 216 // First part, unroll the loop 8 times. | 216 // First part, unroll the loop 8 times. |
| 217 for (i = 0; i < length - residual; i += 8) { | 217 for (i = 0; i < length - residual; i += 8) { |
| 218 int16x8_t in16x8 = vld1q_s16(p_start); | 218 int16x8_t in16x8 = vld1q_s16(p_start); |
| 219 min16x8 = vminq_s16(min16x8, in16x8); | 219 min16x8 = vminq_s16(min16x8, in16x8); |
| 220 p_start += 8; | 220 p_start += 8; |
| 221 } | 221 } |
| (...skipping 17 matching lines...) Expand all Loading... |
| 239 return minimum; | 239 return minimum; |
| 240 } | 240 } |
| 241 | 241 |
| 242 // Minimum value of word32 vector. NEON intrinsics version for | 242 // Minimum value of word32 vector. NEON intrinsics version for |
| 243 // ARM 32-bit/64-bit platforms. | 243 // ARM 32-bit/64-bit platforms. |
| 244 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { | 244 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { |
| 245 int32_t minimum = WEBRTC_SPL_WORD32_MAX; | 245 int32_t minimum = WEBRTC_SPL_WORD32_MAX; |
| 246 size_t i = 0; | 246 size_t i = 0; |
| 247 size_t residual = length & 0x7; | 247 size_t residual = length & 0x7; |
| 248 | 248 |
| 249 assert(length > 0); | 249 RTC_DCHECK_GT(length, 0); |
| 250 | 250 |
| 251 const int32_t* p_start = vector; | 251 const int32_t* p_start = vector; |
| 252 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 252 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
| 253 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 253 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
| 254 | 254 |
| 255 // First part, unroll the loop 8 times. | 255 // First part, unroll the loop 8 times. |
| 256 for (i = 0; i < length - residual; i += 8) { | 256 for (i = 0; i < length - residual; i += 8) { |
| 257 int32x4_t in32x4_0 = vld1q_s32(p_start); | 257 int32x4_t in32x4_0 = vld1q_s32(p_start); |
| 258 p_start += 4; | 258 p_start += 4; |
| 259 int32x4_t in32x4_1 = vld1q_s32(p_start); | 259 int32x4_t in32x4_1 = vld1q_s32(p_start); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 274 | 274 |
| 275 // Second part, do the remaining iterations (if any). | 275 // Second part, do the remaining iterations (if any). |
| 276 for (i = residual; i > 0; i--) { | 276 for (i = residual; i > 0; i--) { |
| 277 if (*p_start < minimum) | 277 if (*p_start < minimum) |
| 278 minimum = *p_start; | 278 minimum = *p_start; |
| 279 p_start++; | 279 p_start++; |
| 280 } | 280 } |
| 281 return minimum; | 281 return minimum; |
| 282 } | 282 } |
| 283 | 283 |
| OLD | NEW |