OLD | NEW |
1 // | 1 // |
2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
3 // | 3 // |
4 // Use of this source code is governed by a BSD-style license | 4 // Use of this source code is governed by a BSD-style license |
5 // that can be found in the LICENSE file in the root of the source | 5 // that can be found in the LICENSE file in the root of the source |
6 // tree. An additional intellectual property rights grant can be found | 6 // tree. An additional intellectual property rights grant can be found |
7 // in the file PATENTS. All contributing project authors may | 7 // in the file PATENTS. All contributing project authors may |
8 // be found in the AUTHORS file in the root of the source tree. | 8 // be found in the AUTHORS file in the root of the source tree. |
9 // | 9 // |
10 // | 10 // |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 #define stepTwiddle x10 | 73 #define stepTwiddle x10 |
74 #define setCount x11 | 74 #define setCount x11 |
75 #define srcStep x12 | 75 #define srcStep x12 |
76 #define setStep x13 | 76 #define setStep x13 |
77 #define dstStep x14 | 77 #define dstStep x14 |
78 #define twStep x15 | 78 #define twStep x15 |
79 | 79 |
80 // Neon Registers | 80 // Neon Registers |
81 | 81 |
82 #define dW1 v0.2s | 82 #define dW1 v0.2s |
| 83 #define dW1s v0.s |
83 #define dW2 v1.2s | 84 #define dW2 v1.2s |
| 85 #define dW2s v1.s |
84 #define dW3 v2.2s | 86 #define dW3 v2.2s |
| 87 #define dW3s v2.s |
85 | 88 |
86 #define dXr0 v4.2s | 89 #define dXr0 v4.2s |
87 #define dXi0 v5.2s | 90 #define dXi0 v5.2s |
88 #define dXr1 v6.2s | 91 #define dXr1 v6.2s |
89 #define dXi1 v7.2s | 92 #define dXi1 v7.2s |
90 #define dXr2 v8.2s | 93 #define dXr2 v8.2s |
91 #define dXi2 v9.2s | 94 #define dXi2 v9.2s |
92 #define dXr3 v10.2s | 95 #define dXr3 v10.2s |
93 #define dXi3 v11.2s | 96 #define dXi3 v11.2s |
94 #define dYr0 v12.2s | 97 #define dYr0 v12.2s |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
175 ADD pSrc,pSrc,pointStep | 178 ADD pSrc,pSrc,pointStep |
176 | 179 |
177 | 180 |
178 // Loop on the sets | 181 // Loop on the sets |
179 | 182 |
180 radix4SetLoop\name : | 183 radix4SetLoop\name : |
181 | 184 |
182 | 185 |
183 | 186 |
184 .ifeqs "\inverse", "TRUE" | 187 .ifeqs "\inverse", "TRUE" |
185 fmul dZr1,dXr1,dW1[0] | 188 fmul dZr1,dXr1,dW1s[0] |
186 fmul dZi1,dXi1,dW1[0] | 189 fmul dZi1,dXi1,dW1s[0] |
187 fmul dZr2,dXr2,dW2[0] | 190 fmul dZr2,dXr2,dW2s[0] |
188 fmul dZi2,dXi2,dW2[0] | 191 fmul dZi2,dXi2,dW2s[0] |
189 fmul dZr3,dXr3,dW3[0] | 192 fmul dZr3,dXr3,dW3s[0] |
190 fmul dZi3,dXi3,dW3[0] | 193 fmul dZi3,dXi3,dW3s[0] |
191 | 194 |
192 fmla dZr1,dXi1,dW1[1] // real part | 195 fmla dZr1,dXi1,dW1s[1] // real part |
193 fmls dZi1,dXr1,dW1[1] // imag part | 196 fmls dZi1,dXr1,dW1s[1] // imag part |
194 | 197 |
195 // data[1] for next iteration | 198 // data[1] for next iteration |
196 ld2 {dXr1,dXi1},[pSrc],pointStep | 199 ld2 {dXr1,dXi1},[pSrc],pointStep |
197 | 200 |
198 fmla dZr2,dXi2,dW2[1] // real part | 201 fmla dZr2,dXi2,dW2s[1] // real part |
199 fmls dZi2,dXr2,dW2[1] // imag part | 202 fmls dZi2,dXr2,dW2s[1] // imag part |
200 | 203 |
201 // data[2] for next iteration | 204 // data[2] for next iteration |
202 ld2 {dXr2,dXi2},[pSrc],pointStep | 205 ld2 {dXr2,dXi2},[pSrc],pointStep |
203 | 206 |
204 fmla dZr3,dXi3,dW3[1] // real part | 207 fmla dZr3,dXi3,dW3s[1] // real part |
205 fmls dZi3,dXr3,dW3[1] // imag part | 208 fmls dZi3,dXr3,dW3s[1] // imag part |
206 .else | 209 .else |
207 fmul dZr1,dXr1,dW1[0] | 210 fmul dZr1,dXr1,dW1s[0] |
208 fmul dZi1,dXi1,dW1[0] | 211 fmul dZi1,dXi1,dW1s[0] |
209 fmul dZr2,dXr2,dW2[0] | 212 fmul dZr2,dXr2,dW2s[0] |
210 fmul dZi2,dXi2,dW2[0] | 213 fmul dZi2,dXi2,dW2s[0] |
211 fmul dZr3,dXr3,dW3[0] | 214 fmul dZr3,dXr3,dW3s[0] |
212 fmul dZi3,dXi3,dW3[0] | 215 fmul dZi3,dXi3,dW3s[0] |
213 | 216 |
214 fmls dZr1,dXi1,dW1[1] // real part | 217 fmls dZr1,dXi1,dW1s[1] // real part |
215 fmla dZi1,dXr1,dW1[1] // imag part | 218 fmla dZi1,dXr1,dW1s[1] // imag part |
216 | 219 |
217 // data[1] for next iteration | 220 // data[1] for next iteration |
218 ld2 {dXr1,dXi1},[pSrc],pointStep | 221 ld2 {dXr1,dXi1},[pSrc],pointStep |
219 | 222 |
220 fmls dZr2,dXi2,dW2[1] // real part | 223 fmls dZr2,dXi2,dW2s[1] // real part |
221 fmla dZi2,dXr2,dW2[1] // imag part | 224 fmla dZi2,dXr2,dW2s[1] // imag part |
222 | 225 |
223 // data[2] for next iteration | 226 // data[2] for next iteration |
224 ld2 {dXr2,dXi2},[pSrc],pointStep | 227 ld2 {dXr2,dXi2},[pSrc],pointStep |
225 | 228 |
226 fmls dZr3,dXi3,dW3[1] // real part | 229 fmls dZr3,dXi3,dW3s[1] // real part |
227 fmla dZi3,dXr3,dW3[1] // imag part | 230 fmla dZi3,dXr3,dW3s[1] // imag part |
228 .endif | 231 .endif |
229 | 232 |
230 // data[3] & update pSrc to data[0] | 233 // data[3] & update pSrc to data[0] |
231 // But don't read on the very last iteration because that reads past | 234 // But don't read on the very last iteration because that reads past |
232 // the end of pSrc. The last iteration is grpCount = 4, setCount = 2. | 235 // the end of pSrc. The last iteration is grpCount = 4, setCount = 2. |
233 cmp grpCount, #4 | 236 cmp grpCount, #4 |
234 | 237 |
235 b.ne skipUpdate\name | 238 b.ne skipUpdate\name |
236 cmp setCount, #2 | 239 cmp setCount, #2 |
237 b.ne skipUpdate\name | 240 b.ne skipUpdate\name |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
330 FFTSTAGE "FALSE","FALSE",FWD | 333 FFTSTAGE "FALSE","FALSE",FWD |
331 M_END | 334 M_END |
332 | 335 |
333 | 336 |
334 M_START armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace,,d15 | 337 M_START armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace,,d15 |
335 FFTSTAGE "FALSE","TRUE",INV | 338 FFTSTAGE "FALSE","TRUE",INV |
336 M_END | 339 M_END |
337 | 340 |
338 | 341 |
339 .end | 342 .end |
OLD | NEW |