Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(489)

Side by Side Diff: dl/sp/src/arm/arm64/ComplexToRealFixup.S

Issue 1420973006: arm64: clang assembler compatability (Closed) Base URL: https://chromium.googlesource.com/external/webrtc/deps/third_party/openmax@master
Patch Set: arm64: clang assembler compatibility Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « dl/sp/api/armSP.h ('k') | dl/sp/src/arm/arm64/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_s.S » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // 1 //
2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 // 3 //
4 // Use of this source code is governed by a BSD-style license 4 // Use of this source code is governed by a BSD-style license
5 // that can be found in the LICENSE file in the root of the source 5 // that can be found in the LICENSE file in the root of the source
6 // tree. An additional intellectual property rights grant can be found 6 // tree. An additional intellectual property rights grant can be found
7 // in the file PATENTS. All contributing project authors may 7 // in the file PATENTS. All contributing project authors may
8 // be found in the AUTHORS file in the root of the source tree. 8 // be found in the AUTHORS file in the root of the source tree.
9 // 9 //
10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s 10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
87 #define dW1r8b v16.8b 87 #define dW1r8b v16.8b
88 #define dW1i v17.2s 88 #define dW1i v17.2s
89 #define dY0r v14.2s 89 #define dY0r v14.2s
90 #define dY0i v15.2s 90 #define dY0i v15.2s
91 #define dY1r v16.2s 91 #define dY1r v16.2s
92 #define dY1i v17.2s 92 #define dY1i v17.2s
93 #define qT2 v18.2s 93 #define qT2 v18.2s
94 #define qT3 v20.2s 94 #define qT3 v20.2s
95 95
96 #define half v0.2s 96 #define half v0.2s
97 #define halfs v0.s
97 #define dZip v21.2s 98 #define dZip v21.2s
98 #define dZip8b v21.8b 99 #define dZip8b v21.8b
99 100
100 // Allocate stack memory required by the function 101 // Allocate stack memory required by the function
101 102
102 // Write function header 103 // Write function header
103 M_START ComplexToRealFixup,,d15 104 M_START ComplexToRealFixup,,d15
104 105
105 asr N, N, #1 106 asr N, N, #1
106 107
107 clz order, subFFTNum // N = 2^order 108 clz order, subFFTNum // N = 2^order
108 109
109 RSB order,order,#63 110 rsb order,order,#63
110 MOV subFFTSize,subFFTNum // subFFTSize = N/2 111 MOV subFFTSize,subFFTNum // subFFTSize = N/2
111 //MOV subFFTNum,N 112 //MOV subFFTNum,N
112 mov argDst, pDst 113 mov argDst, pDst
113 mov argTwiddle, pTwiddle 114 mov argTwiddle, pTwiddle
114 115
115 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)] 116 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
116 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)] 117 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]
117 // 1/2[2a+j0] - j [0+j2b] 118 // 1/2[2a+j0] - j [0+j2b]
118 // (a+b, 0) 119 // (a+b, 0)
119 120
120 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)] 121 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
121 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)] 122 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]
122 // 1/2[2a+j0] + j [0+j2b] 123 // 1/2[2a+j0] + j [0+j2b]
123 // (a-b, 0) 124 // (a-b, 0)
124 125
125 // F(0) and F(N/2) 126 // F(0) and F(N/2)
126 ld2 {dX0rs,dX0is}[0],[pSrc], #8 127 ld2 {dX0rs,dX0is}[0],[pSrc], #8
127 MOV zero,#0 128 MOV zero,#0
128 mov dX0rs[1],zero 129 mov dX0rs[1],zero
129 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes 130 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes
130 mov dX0i[1],zero 131 mov dX0is[1],zero
131 // twStep = 3N/8 * 8 bytes pointing to W^1 132 // twStep = 3N/8 * 8 bytes pointing to W^1
132 SUB twStep,step,subFFTSize,LSL #1 133 SUB twStep,step,subFFTSize,LSL #1
133 134
134 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0) 135 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0)
135 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes 136 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes
136 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0) 137 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0)
137 SUBS subFFTSize,subFFTSize,#2 138 SUBS subFFTSize,subFFTSize,#2
138 139
139 st1 {dY0r},[argDst],step 140 st1 {dY0r},[argDst],step
140 ADD pTwiddleTmp,argTwiddle,#8 // W^2 141 ADD pTwiddleTmp,argTwiddle,#8 // W^2
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 rev64 dX1i,dX1i 179 rev64 dX1i,dX1i
179 SUBS subFFTSize,subFFTSize,#4 180 SUBS subFFTSize,subFFTSize,#4
180 181
181 182
182 183
183 fsub dT2,dX0r,dX1r // a-c 184 fsub dT2,dX0r,dX1r // a-c
184 SUB step1,step1,#8 185 SUB step1,step1,#8
185 fadd dT0,dX0r,dX1r // a+c 186 fadd dT0,dX0r,dX1r // a+c
186 fsub dT1,dX0i,dX1i // b-d 187 fsub dT1,dX0i,dX1i // b-d
187 fadd dT3,dX0i,dX1i // b+d 188 fadd dT3,dX0i,dX1i // b+d
188 fmul dT0,dT0,half[0] 189 fmul dT0,dT0,halfs[0]
189 fmul dT1,dT1,half[0] 190 fmul dT1,dT1,halfs[0]
190 // VZIP dW1r,dW1i 191 // VZIP dW1r,dW1i
191 // VZIP dW0r,dW0i 192 // VZIP dW0r,dW0i
192 zip1 dZip, dW1r, dW1i 193 zip1 dZip, dW1r, dW1i
193 zip2 dW1i, dW1r, dW1i 194 zip2 dW1i, dW1r, dW1i
194 mov dW1r8b, dZip8b 195 mov dW1r8b, dZip8b
195 zip1 dZip, dW0r, dW0i 196 zip1 dZip, dW0r, dW0i
196 zip2 dW0i, dW0r, dW0i 197 zip2 dW0i, dW0r, dW0i
197 mov dW0r8b, dZip8b 198 mov dW0r8b, dZip8b
198 199
199 fmul qT0,dW1r,dT2 200 fmul qT0,dW1r,dT2
200 fmul qT1,dW1r,dT3 201 fmul qT1,dW1r,dT3
201 fmul qT2,dW0r,dT2 202 fmul qT2,dW0r,dT2
202 fmul qT3,dW0r,dT3 203 fmul qT3,dW0r,dT3
203 204
204 fmla qT0,dW1i,dT3 205 fmla qT0,dW1i,dT3
205 fmls qT1,dW1i,dT2 206 fmls qT1,dW1i,dT2
206 207
207 fmls qT2,dW0i,dT3 208 fmls qT2,dW0i,dT3
208 fmla qT3,dW0i,dT2 209 fmla qT3,dW0i,dT2
209 210
210 211
211 fmul dX1r,qT0,half[0] 212 fmul dX1r,qT0,halfs[0]
212 fmul dX1i,qT1,half[0] 213 fmul dX1i,qT1,halfs[0]
213 214
214 fsub dY1r,dT0,dX1i // F(N/2 -1) 215 fsub dY1r,dT0,dX1i // F(N/2 -1)
215 fadd dY1i,dT1,dX1r 216 fadd dY1i,dT1,dX1r
216 fneg dY1i,dY1i 217 fneg dY1i,dY1i
217 218
218 rev64 dY1r,dY1r 219 rev64 dY1r,dY1r
219 rev64 dY1i,dY1i 220 rev64 dY1i,dY1i
220 221
221 222
222 fmul dX0r,qT2,half[0] 223 fmul dX0r,qT2,halfs[0]
223 fmul dX0i,qT3,half[0] 224 fmul dX0i,qT3,halfs[0]
224 225
225 fsub dY0r,dT0,dX0i // F(1) 226 fsub dY0r,dT0,dX0i // F(1)
226 fadd dY0i,dT1,dX0r 227 fadd dY0i,dT1,dX0r
227 228
228 229
229 st2 {dY0r,dY0i},[argDst],step 230 st2 {dY0r,dY0i},[argDst],step
230 st2 {dY1r,dY1i},[argDst], #16 231 st2 {dY1r,dY1i},[argDst], #16
231 SUB argDst,argDst,step 232 SUB argDst,argDst,step
232 SUB step,step,#32 // (N/2-4)*8 bytes 233 SUB step,step,#32 // (N/2-4)*8 bytes
233 234
(...skipping 18 matching lines...) Expand all
252 253
253 st1 {dX0rs}[0],[argDst], #4 254 st1 {dX0rs}[0],[argDst], #4
254 fneg dX0r,dX0r 255 fneg dX0r,dX0r
255 st1 {dX0rs}[1],[argDst], #4 256 st1 {dX0rs}[1],[argDst], #4
256 End: 257 End:
257 258
258 // Write function tail 259 // Write function tail
259 M_END 260 M_END
260 261
261 .end 262 .end
OLDNEW
« no previous file with comments | « dl/sp/api/armSP.h ('k') | dl/sp/src/arm/arm64/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_s.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698