OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
12 #include <assert.h> | |
13 #include <stdlib.h> | 12 #include <stdlib.h> |
14 | 13 |
| 14 #include "webrtc/base/checks.h" |
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
16 | 16 |
17 // Maximum absolute value of word16 vector. C version for generic platforms. | 17 // Maximum absolute value of word16 vector. C version for generic platforms. |
18 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { | 18 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { |
19 int absolute = 0, maximum = 0; | 19 int absolute = 0, maximum = 0; |
20 | 20 |
21 assert(length > 0); | 21 RTC_DCHECK_GT(length, 0); |
22 | 22 |
23 const int16_t* p_start = vector; | 23 const int16_t* p_start = vector; |
24 size_t rest = length & 7; | 24 size_t rest = length & 7; |
25 const int16_t* p_end = vector + length - rest; | 25 const int16_t* p_end = vector + length - rest; |
26 | 26 |
27 int16x8_t v; | 27 int16x8_t v; |
28 uint16x8_t max_qv; | 28 uint16x8_t max_qv; |
29 max_qv = vdupq_n_u16(0); | 29 max_qv = vdupq_n_u16(0); |
30 | 30 |
31 while (p_start < p_end) { | 31 while (p_start < p_end) { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
69 // Maximum absolute value of word32 vector. NEON intrinsics version for | 69 // Maximum absolute value of word32 vector. NEON intrinsics version for |
70 // ARM 32-bit/64-bit platforms. | 70 // ARM 32-bit/64-bit platforms. |
71 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { | 71 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { |
72 // Use uint32_t for the local variables, to accommodate the return value | 72 // Use uint32_t for the local variables, to accommodate the return value |
73 // of abs(0x80000000), which is 0x80000000. | 73 // of abs(0x80000000), which is 0x80000000. |
74 | 74 |
75 uint32_t absolute = 0, maximum = 0; | 75 uint32_t absolute = 0, maximum = 0; |
76 size_t i = 0; | 76 size_t i = 0; |
77 size_t residual = length & 0x7; | 77 size_t residual = length & 0x7; |
78 | 78 |
79 assert(length > 0); | 79 RTC_DCHECK_GT(length, 0); |
80 | 80 |
81 const int32_t* p_start = vector; | 81 const int32_t* p_start = vector; |
82 uint32x4_t max32x4_0 = vdupq_n_u32(0); | 82 uint32x4_t max32x4_0 = vdupq_n_u32(0); |
83 uint32x4_t max32x4_1 = vdupq_n_u32(0); | 83 uint32x4_t max32x4_1 = vdupq_n_u32(0); |
84 | 84 |
85 // First part, unroll the loop 8 times. | 85 // First part, unroll the loop 8 times. |
86 for (i = 0; i < length - residual; i += 8) { | 86 for (i = 0; i < length - residual; i += 8) { |
87 int32x4_t in32x4_0 = vld1q_s32(p_start); | 87 int32x4_t in32x4_0 = vld1q_s32(p_start); |
88 p_start += 4; | 88 p_start += 4; |
89 int32x4_t in32x4_1 = vld1q_s32(p_start); | 89 int32x4_t in32x4_1 = vld1q_s32(p_start); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
121 return (int32_t)maximum; | 121 return (int32_t)maximum; |
122 } | 122 } |
123 | 123 |
124 // Maximum value of word16 vector. NEON intrinsics version for | 124 // Maximum value of word16 vector. NEON intrinsics version for |
125 // ARM 32-bit/64-bit platforms. | 125 // ARM 32-bit/64-bit platforms. |
126 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { | 126 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { |
127 int16_t maximum = WEBRTC_SPL_WORD16_MIN; | 127 int16_t maximum = WEBRTC_SPL_WORD16_MIN; |
128 size_t i = 0; | 128 size_t i = 0; |
129 size_t residual = length & 0x7; | 129 size_t residual = length & 0x7; |
130 | 130 |
131 assert(length > 0); | 131 RTC_DCHECK_GT(length, 0); |
132 | 132 |
133 const int16_t* p_start = vector; | 133 const int16_t* p_start = vector; |
134 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); | 134 int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); |
135 | 135 |
136 // First part, unroll the loop 8 times. | 136 // First part, unroll the loop 8 times. |
137 for (i = 0; i < length - residual; i += 8) { | 137 for (i = 0; i < length - residual; i += 8) { |
138 int16x8_t in16x8 = vld1q_s16(p_start); | 138 int16x8_t in16x8 = vld1q_s16(p_start); |
139 max16x8 = vmaxq_s16(max16x8, in16x8); | 139 max16x8 = vmaxq_s16(max16x8, in16x8); |
140 p_start += 8; | 140 p_start += 8; |
141 } | 141 } |
(...skipping 17 matching lines...) Expand all Loading... |
159 return maximum; | 159 return maximum; |
160 } | 160 } |
161 | 161 |
162 // Maximum value of word32 vector. NEON intrinsics version for | 162 // Maximum value of word32 vector. NEON intrinsics version for |
163 // ARM 32-bit/64-bit platforms. | 163 // ARM 32-bit/64-bit platforms. |
164 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { | 164 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { |
165 int32_t maximum = WEBRTC_SPL_WORD32_MIN; | 165 int32_t maximum = WEBRTC_SPL_WORD32_MIN; |
166 size_t i = 0; | 166 size_t i = 0; |
167 size_t residual = length & 0x7; | 167 size_t residual = length & 0x7; |
168 | 168 |
169 assert(length > 0); | 169 RTC_DCHECK_GT(length, 0); |
170 | 170 |
171 const int32_t* p_start = vector; | 171 const int32_t* p_start = vector; |
172 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 172 int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
173 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); | 173 int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); |
174 | 174 |
175 // First part, unroll the loop 8 times. | 175 // First part, unroll the loop 8 times. |
176 for (i = 0; i < length - residual; i += 8) { | 176 for (i = 0; i < length - residual; i += 8) { |
177 int32x4_t in32x4_0 = vld1q_s32(p_start); | 177 int32x4_t in32x4_0 = vld1q_s32(p_start); |
178 p_start += 4; | 178 p_start += 4; |
179 int32x4_t in32x4_1 = vld1q_s32(p_start); | 179 int32x4_t in32x4_1 = vld1q_s32(p_start); |
(...skipping 21 matching lines...) Expand all Loading... |
201 return maximum; | 201 return maximum; |
202 } | 202 } |
203 | 203 |
204 // Minimum value of word16 vector. NEON intrinsics version for | 204 // Minimum value of word16 vector. NEON intrinsics version for |
205 // ARM 32-bit/64-bit platforms. | 205 // ARM 32-bit/64-bit platforms. |
206 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { | 206 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { |
207 int16_t minimum = WEBRTC_SPL_WORD16_MAX; | 207 int16_t minimum = WEBRTC_SPL_WORD16_MAX; |
208 size_t i = 0; | 208 size_t i = 0; |
209 size_t residual = length & 0x7; | 209 size_t residual = length & 0x7; |
210 | 210 |
211 assert(length > 0); | 211 RTC_DCHECK_GT(length, 0); |
212 | 212 |
213 const int16_t* p_start = vector; | 213 const int16_t* p_start = vector; |
214 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); | 214 int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); |
215 | 215 |
216 // First part, unroll the loop 8 times. | 216 // First part, unroll the loop 8 times. |
217 for (i = 0; i < length - residual; i += 8) { | 217 for (i = 0; i < length - residual; i += 8) { |
218 int16x8_t in16x8 = vld1q_s16(p_start); | 218 int16x8_t in16x8 = vld1q_s16(p_start); |
219 min16x8 = vminq_s16(min16x8, in16x8); | 219 min16x8 = vminq_s16(min16x8, in16x8); |
220 p_start += 8; | 220 p_start += 8; |
221 } | 221 } |
(...skipping 17 matching lines...) Expand all Loading... |
239 return minimum; | 239 return minimum; |
240 } | 240 } |
241 | 241 |
242 // Minimum value of word32 vector. NEON intrinsics version for | 242 // Minimum value of word32 vector. NEON intrinsics version for |
243 // ARM 32-bit/64-bit platforms. | 243 // ARM 32-bit/64-bit platforms. |
244 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { | 244 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { |
245 int32_t minimum = WEBRTC_SPL_WORD32_MAX; | 245 int32_t minimum = WEBRTC_SPL_WORD32_MAX; |
246 size_t i = 0; | 246 size_t i = 0; |
247 size_t residual = length & 0x7; | 247 size_t residual = length & 0x7; |
248 | 248 |
249 assert(length > 0); | 249 RTC_DCHECK_GT(length, 0); |
250 | 250 |
251 const int32_t* p_start = vector; | 251 const int32_t* p_start = vector; |
252 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 252 int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
253 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); | 253 int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); |
254 | 254 |
255 // First part, unroll the loop 8 times. | 255 // First part, unroll the loop 8 times. |
256 for (i = 0; i < length - residual; i += 8) { | 256 for (i = 0; i < length - residual; i += 8) { |
257 int32x4_t in32x4_0 = vld1q_s32(p_start); | 257 int32x4_t in32x4_0 = vld1q_s32(p_start); |
258 p_start += 4; | 258 p_start += 4; |
259 int32x4_t in32x4_1 = vld1q_s32(p_start); | 259 int32x4_t in32x4_1 = vld1q_s32(p_start); |
(...skipping 14 matching lines...) Expand all Loading... |
274 | 274 |
275 // Second part, do the remaining iterations (if any). | 275 // Second part, do the remaining iterations (if any). |
276 for (i = residual; i > 0; i--) { | 276 for (i = residual; i > 0; i--) { |
277 if (*p_start < minimum) | 277 if (*p_start < minimum) |
278 minimum = *p_start; | 278 minimum = *p_start; |
279 p_start++; | 279 p_start++; |
280 } | 280 } |
281 return minimum; | 281 return minimum; |
282 } | 282 } |
283 | 283 |
OLD | NEW |