OLD | NEW |
1 // | 1 // |
2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
3 // | 3 // |
4 // Use of this source code is governed by a BSD-style license | 4 // Use of this source code is governed by a BSD-style license |
5 // that can be found in the LICENSE file in the root of the source | 5 // that can be found in the LICENSE file in the root of the source |
6 // tree. An additional intellectual property rights grant can be found | 6 // tree. An additional intellectual property rights grant can be found |
7 // in the file PATENTS. All contributing project authors may | 7 // in the file PATENTS. All contributing project authors may |
8 // be found in the AUTHORS file in the root of the source tree. | 8 // be found in the AUTHORS file in the root of the source tree. |
9 // | 9 // |
10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s | 10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
87 #define dW1r8b v16.8b | 87 #define dW1r8b v16.8b |
88 #define dW1i v17.2s | 88 #define dW1i v17.2s |
89 #define dY0r v14.2s | 89 #define dY0r v14.2s |
90 #define dY0i v15.2s | 90 #define dY0i v15.2s |
91 #define dY1r v16.2s | 91 #define dY1r v16.2s |
92 #define dY1i v17.2s | 92 #define dY1i v17.2s |
93 #define qT2 v18.2s | 93 #define qT2 v18.2s |
94 #define qT3 v20.2s | 94 #define qT3 v20.2s |
95 | 95 |
96 #define half v0.2s | 96 #define half v0.2s |
| 97 #define halfs v0.s |
97 #define dZip v21.2s | 98 #define dZip v21.2s |
98 #define dZip8b v21.8b | 99 #define dZip8b v21.8b |
99 | 100 |
100 // Allocate stack memory required by the function | 101 // Allocate stack memory required by the function |
101 | 102 |
102 // Write function header | 103 // Write function header |
103 M_START ComplexToRealFixup,,d15 | 104 M_START ComplexToRealFixup,,d15 |
104 | 105 |
105 asr N, N, #1 | 106 asr N, N, #1 |
106 | 107 |
107 clz order, subFFTNum // N = 2^order | 108 clz order, subFFTNum // N = 2^order |
108 | 109 |
109 RSB order,order,#63 | 110 rsb order,order,#63 |
110 MOV subFFTSize,subFFTNum // subFFTSize = N/2 | 111 MOV subFFTSize,subFFTNum // subFFTSize = N/2 |
111 //MOV subFFTNum,N | 112 //MOV subFFTNum,N |
112 mov argDst, pDst | 113 mov argDst, pDst |
113 mov argTwiddle, pTwiddle | 114 mov argTwiddle, pTwiddle |
114 | 115 |
115 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)] | 116 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)] |
116 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)] | 117 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)] |
117 // 1/2[2a+j0] - j [0+j2b] | 118 // 1/2[2a+j0] - j [0+j2b] |
118 // (a+b, 0) | 119 // (a+b, 0) |
119 | 120 |
120 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)] | 121 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)] |
121 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)] | 122 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)] |
122 // 1/2[2a+j0] + j [0+j2b] | 123 // 1/2[2a+j0] + j [0+j2b] |
123 // (a-b, 0) | 124 // (a-b, 0) |
124 | 125 |
125 // F(0) and F(N/2) | 126 // F(0) and F(N/2) |
126 ld2 {dX0rs,dX0is}[0],[pSrc], #8 | 127 ld2 {dX0rs,dX0is}[0],[pSrc], #8 |
127 MOV zero,#0 | 128 MOV zero,#0 |
128 mov dX0rs[1],zero | 129 mov dX0rs[1],zero |
129 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes | 130 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes |
130 mov dX0i[1],zero | 131 mov dX0is[1],zero |
131 // twStep = 3N/8 * 8 bytes pointing to W^1 | 132 // twStep = 3N/8 * 8 bytes pointing to W^1 |
132 SUB twStep,step,subFFTSize,LSL #1 | 133 SUB twStep,step,subFFTSize,LSL #1 |
133 | 134 |
134 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0) | 135 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0) |
135 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes | 136 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes |
136 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0) | 137 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0) |
137 SUBS subFFTSize,subFFTSize,#2 | 138 SUBS subFFTSize,subFFTSize,#2 |
138 | 139 |
139 st1 {dY0r},[argDst],step | 140 st1 {dY0r},[argDst],step |
140 ADD pTwiddleTmp,argTwiddle,#8 // W^2 | 141 ADD pTwiddleTmp,argTwiddle,#8 // W^2 |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
178 rev64 dX1i,dX1i | 179 rev64 dX1i,dX1i |
179 SUBS subFFTSize,subFFTSize,#4 | 180 SUBS subFFTSize,subFFTSize,#4 |
180 | 181 |
181 | 182 |
182 | 183 |
183 fsub dT2,dX0r,dX1r // a-c | 184 fsub dT2,dX0r,dX1r // a-c |
184 SUB step1,step1,#8 | 185 SUB step1,step1,#8 |
185 fadd dT0,dX0r,dX1r // a+c | 186 fadd dT0,dX0r,dX1r // a+c |
186 fsub dT1,dX0i,dX1i // b-d | 187 fsub dT1,dX0i,dX1i // b-d |
187 fadd dT3,dX0i,dX1i // b+d | 188 fadd dT3,dX0i,dX1i // b+d |
188 fmul dT0,dT0,half[0] | 189 fmul dT0,dT0,halfs[0] |
189 fmul dT1,dT1,half[0] | 190 fmul dT1,dT1,halfs[0] |
190 // VZIP dW1r,dW1i | 191 // VZIP dW1r,dW1i |
191 // VZIP dW0r,dW0i | 192 // VZIP dW0r,dW0i |
192 zip1 dZip, dW1r, dW1i | 193 zip1 dZip, dW1r, dW1i |
193 zip2 dW1i, dW1r, dW1i | 194 zip2 dW1i, dW1r, dW1i |
194 mov dW1r8b, dZip8b | 195 mov dW1r8b, dZip8b |
195 zip1 dZip, dW0r, dW0i | 196 zip1 dZip, dW0r, dW0i |
196 zip2 dW0i, dW0r, dW0i | 197 zip2 dW0i, dW0r, dW0i |
197 mov dW0r8b, dZip8b | 198 mov dW0r8b, dZip8b |
198 | 199 |
199 fmul qT0,dW1r,dT2 | 200 fmul qT0,dW1r,dT2 |
200 fmul qT1,dW1r,dT3 | 201 fmul qT1,dW1r,dT3 |
201 fmul qT2,dW0r,dT2 | 202 fmul qT2,dW0r,dT2 |
202 fmul qT3,dW0r,dT3 | 203 fmul qT3,dW0r,dT3 |
203 | 204 |
204 fmla qT0,dW1i,dT3 | 205 fmla qT0,dW1i,dT3 |
205 fmls qT1,dW1i,dT2 | 206 fmls qT1,dW1i,dT2 |
206 | 207 |
207 fmls qT2,dW0i,dT3 | 208 fmls qT2,dW0i,dT3 |
208 fmla qT3,dW0i,dT2 | 209 fmla qT3,dW0i,dT2 |
209 | 210 |
210 | 211 |
211 fmul dX1r,qT0,half[0] | 212 fmul dX1r,qT0,halfs[0] |
212 fmul dX1i,qT1,half[0] | 213 fmul dX1i,qT1,halfs[0] |
213 | 214 |
214 fsub dY1r,dT0,dX1i // F(N/2 -1) | 215 fsub dY1r,dT0,dX1i // F(N/2 -1) |
215 fadd dY1i,dT1,dX1r | 216 fadd dY1i,dT1,dX1r |
216 fneg dY1i,dY1i | 217 fneg dY1i,dY1i |
217 | 218 |
218 rev64 dY1r,dY1r | 219 rev64 dY1r,dY1r |
219 rev64 dY1i,dY1i | 220 rev64 dY1i,dY1i |
220 | 221 |
221 | 222 |
222 fmul dX0r,qT2,half[0] | 223 fmul dX0r,qT2,halfs[0] |
223 fmul dX0i,qT3,half[0] | 224 fmul dX0i,qT3,halfs[0] |
224 | 225 |
225 fsub dY0r,dT0,dX0i // F(1) | 226 fsub dY0r,dT0,dX0i // F(1) |
226 fadd dY0i,dT1,dX0r | 227 fadd dY0i,dT1,dX0r |
227 | 228 |
228 | 229 |
229 st2 {dY0r,dY0i},[argDst],step | 230 st2 {dY0r,dY0i},[argDst],step |
230 st2 {dY1r,dY1i},[argDst], #16 | 231 st2 {dY1r,dY1i},[argDst], #16 |
231 SUB argDst,argDst,step | 232 SUB argDst,argDst,step |
232 SUB step,step,#32 // (N/2-4)*8 bytes | 233 SUB step,step,#32 // (N/2-4)*8 bytes |
233 | 234 |
(...skipping 18 matching lines...) Expand all Loading... |
252 | 253 |
253 st1 {dX0rs}[0],[argDst], #4 | 254 st1 {dX0rs}[0],[argDst], #4 |
254 fneg dX0r,dX0r | 255 fneg dX0r,dX0r |
255 st1 {dX0rs}[1],[argDst], #4 | 256 st1 {dX0rs}[1],[argDst], #4 |
256 End: | 257 End: |
257 | 258 |
258 // Write function tail | 259 // Write function tail |
259 M_END | 260 M_END |
260 | 261 |
261 .end | 262 .end |
OLD | NEW |