| OLD | NEW |
| 1 // | 1 // |
| 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| 3 // | 3 // |
| 4 // Use of this source code is governed by a BSD-style license | 4 // Use of this source code is governed by a BSD-style license |
| 5 // that can be found in the LICENSE file in the root of the source | 5 // that can be found in the LICENSE file in the root of the source |
| 6 // tree. An additional intellectual property rights grant can be found | 6 // tree. An additional intellectual property rights grant can be found |
| 7 // in the file PATENTS. All contributing project authors may | 7 // in the file PATENTS. All contributing project authors may |
| 8 // be found in the AUTHORS file in the root of the source tree. | 8 // be found in the AUTHORS file in the root of the source tree. |
| 9 // | 9 // |
| 10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s | 10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 87 #define dW1r8b v16.8b | 87 #define dW1r8b v16.8b |
| 88 #define dW1i v17.2s | 88 #define dW1i v17.2s |
| 89 #define dY0r v14.2s | 89 #define dY0r v14.2s |
| 90 #define dY0i v15.2s | 90 #define dY0i v15.2s |
| 91 #define dY1r v16.2s | 91 #define dY1r v16.2s |
| 92 #define dY1i v17.2s | 92 #define dY1i v17.2s |
| 93 #define qT2 v18.2s | 93 #define qT2 v18.2s |
| 94 #define qT3 v20.2s | 94 #define qT3 v20.2s |
| 95 | 95 |
| 96 #define half v0.2s | 96 #define half v0.2s |
| 97 #define halfs v0.s |
| 97 #define dZip v21.2s | 98 #define dZip v21.2s |
| 98 #define dZip8b v21.8b | 99 #define dZip8b v21.8b |
| 99 | 100 |
| 100 // Allocate stack memory required by the function | 101 // Allocate stack memory required by the function |
| 101 | 102 |
| 102 // Write function header | 103 // Write function header |
| 103 M_START ComplexToRealFixup,,d15 | 104 M_START ComplexToRealFixup,,d15 |
| 104 | 105 |
| 105 asr N, N, #1 | 106 asr N, N, #1 |
| 106 | 107 |
| 107 clz order, subFFTNum // N = 2^order | 108 clz order, subFFTNum // N = 2^order |
| 108 | 109 |
| 109 RSB order,order,#63 | 110 rsb order,order,#63 |
| 110 MOV subFFTSize,subFFTNum // subFFTSize = N/2 | 111 MOV subFFTSize,subFFTNum // subFFTSize = N/2 |
| 111 //MOV subFFTNum,N | 112 //MOV subFFTNum,N |
| 112 mov argDst, pDst | 113 mov argDst, pDst |
| 113 mov argTwiddle, pTwiddle | 114 mov argTwiddle, pTwiddle |
| 114 | 115 |
| 115 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)] | 116 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)] |
| 116 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)] | 117 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)] |
| 117 // 1/2[2a+j0] - j [0+j2b] | 118 // 1/2[2a+j0] - j [0+j2b] |
| 118 // (a+b, 0) | 119 // (a+b, 0) |
| 119 | 120 |
| 120 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)] | 121 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)] |
| 121 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)] | 122 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)] |
| 122 // 1/2[2a+j0] + j [0+j2b] | 123 // 1/2[2a+j0] + j [0+j2b] |
| 123 // (a-b, 0) | 124 // (a-b, 0) |
| 124 | 125 |
| 125 // F(0) and F(N/2) | 126 // F(0) and F(N/2) |
| 126 ld2 {dX0rs,dX0is}[0],[pSrc], #8 | 127 ld2 {dX0rs,dX0is}[0],[pSrc], #8 |
| 127 MOV zero,#0 | 128 MOV zero,#0 |
| 128 mov dX0rs[1],zero | 129 mov dX0rs[1],zero |
| 129 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes | 130 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes |
| 130 mov dX0i[1],zero | 131 mov dX0is[1],zero |
| 131 // twStep = 3N/8 * 8 bytes pointing to W^1 | 132 // twStep = 3N/8 * 8 bytes pointing to W^1 |
| 132 SUB twStep,step,subFFTSize,LSL #1 | 133 SUB twStep,step,subFFTSize,LSL #1 |
| 133 | 134 |
| 134 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0) | 135 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0) |
| 135 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes | 136 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes |
| 136 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0) | 137 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0) |
| 137 SUBS subFFTSize,subFFTSize,#2 | 138 SUBS subFFTSize,subFFTSize,#2 |
| 138 | 139 |
| 139 st1 {dY0r},[argDst],step | 140 st1 {dY0r},[argDst],step |
| 140 ADD pTwiddleTmp,argTwiddle,#8 // W^2 | 141 ADD pTwiddleTmp,argTwiddle,#8 // W^2 |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 178 rev64 dX1i,dX1i | 179 rev64 dX1i,dX1i |
| 179 SUBS subFFTSize,subFFTSize,#4 | 180 SUBS subFFTSize,subFFTSize,#4 |
| 180 | 181 |
| 181 | 182 |
| 182 | 183 |
| 183 fsub dT2,dX0r,dX1r // a-c | 184 fsub dT2,dX0r,dX1r // a-c |
| 184 SUB step1,step1,#8 | 185 SUB step1,step1,#8 |
| 185 fadd dT0,dX0r,dX1r // a+c | 186 fadd dT0,dX0r,dX1r // a+c |
| 186 fsub dT1,dX0i,dX1i // b-d | 187 fsub dT1,dX0i,dX1i // b-d |
| 187 fadd dT3,dX0i,dX1i // b+d | 188 fadd dT3,dX0i,dX1i // b+d |
| 188 fmul dT0,dT0,half[0] | 189 fmul dT0,dT0,halfs[0] |
| 189 fmul dT1,dT1,half[0] | 190 fmul dT1,dT1,halfs[0] |
| 190 // VZIP dW1r,dW1i | 191 // VZIP dW1r,dW1i |
| 191 // VZIP dW0r,dW0i | 192 // VZIP dW0r,dW0i |
| 192 zip1 dZip, dW1r, dW1i | 193 zip1 dZip, dW1r, dW1i |
| 193 zip2 dW1i, dW1r, dW1i | 194 zip2 dW1i, dW1r, dW1i |
| 194 mov dW1r8b, dZip8b | 195 mov dW1r8b, dZip8b |
| 195 zip1 dZip, dW0r, dW0i | 196 zip1 dZip, dW0r, dW0i |
| 196 zip2 dW0i, dW0r, dW0i | 197 zip2 dW0i, dW0r, dW0i |
| 197 mov dW0r8b, dZip8b | 198 mov dW0r8b, dZip8b |
| 198 | 199 |
| 199 fmul qT0,dW1r,dT2 | 200 fmul qT0,dW1r,dT2 |
| 200 fmul qT1,dW1r,dT3 | 201 fmul qT1,dW1r,dT3 |
| 201 fmul qT2,dW0r,dT2 | 202 fmul qT2,dW0r,dT2 |
| 202 fmul qT3,dW0r,dT3 | 203 fmul qT3,dW0r,dT3 |
| 203 | 204 |
| 204 fmla qT0,dW1i,dT3 | 205 fmla qT0,dW1i,dT3 |
| 205 fmls qT1,dW1i,dT2 | 206 fmls qT1,dW1i,dT2 |
| 206 | 207 |
| 207 fmls qT2,dW0i,dT3 | 208 fmls qT2,dW0i,dT3 |
| 208 fmla qT3,dW0i,dT2 | 209 fmla qT3,dW0i,dT2 |
| 209 | 210 |
| 210 | 211 |
| 211 fmul dX1r,qT0,half[0] | 212 fmul dX1r,qT0,halfs[0] |
| 212 fmul dX1i,qT1,half[0] | 213 fmul dX1i,qT1,halfs[0] |
| 213 | 214 |
| 214 fsub dY1r,dT0,dX1i // F(N/2 -1) | 215 fsub dY1r,dT0,dX1i // F(N/2 -1) |
| 215 fadd dY1i,dT1,dX1r | 216 fadd dY1i,dT1,dX1r |
| 216 fneg dY1i,dY1i | 217 fneg dY1i,dY1i |
| 217 | 218 |
| 218 rev64 dY1r,dY1r | 219 rev64 dY1r,dY1r |
| 219 rev64 dY1i,dY1i | 220 rev64 dY1i,dY1i |
| 220 | 221 |
| 221 | 222 |
| 222 fmul dX0r,qT2,half[0] | 223 fmul dX0r,qT2,halfs[0] |
| 223 fmul dX0i,qT3,half[0] | 224 fmul dX0i,qT3,halfs[0] |
| 224 | 225 |
| 225 fsub dY0r,dT0,dX0i // F(1) | 226 fsub dY0r,dT0,dX0i // F(1) |
| 226 fadd dY0i,dT1,dX0r | 227 fadd dY0i,dT1,dX0r |
| 227 | 228 |
| 228 | 229 |
| 229 st2 {dY0r,dY0i},[argDst],step | 230 st2 {dY0r,dY0i},[argDst],step |
| 230 st2 {dY1r,dY1i},[argDst], #16 | 231 st2 {dY1r,dY1i},[argDst], #16 |
| 231 SUB argDst,argDst,step | 232 SUB argDst,argDst,step |
| 232 SUB step,step,#32 // (N/2-4)*8 bytes | 233 SUB step,step,#32 // (N/2-4)*8 bytes |
| 233 | 234 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 252 | 253 |
| 253 st1 {dX0rs}[0],[argDst], #4 | 254 st1 {dX0rs}[0],[argDst], #4 |
| 254 fneg dX0r,dX0r | 255 fneg dX0r,dX0r |
| 255 st1 {dX0rs}[1],[argDst], #4 | 256 st1 {dX0rs}[1],[argDst], #4 |
| 256 End: | 257 End: |
| 257 | 258 |
| 258 // Write function tail | 259 // Write function tail |
| 259 M_END | 260 M_END |
| 260 | 261 |
| 261 .end | 262 .end |
| OLD | NEW |