dl/sp/src/arm/arm64/ComplexToRealFixup.S - Issue 1420973006: arm64: clang assembler compatability

Side by Side Diff: dl/sp/src/arm/arm64/ComplexToRealFixup.S

Issue 1420973006: arm64: clang assembler compatability (Closed) Base URL: https://chromium.googlesource.com/external/webrtc/deps/third_party/openmax@master

Patch Set: arm64: clang assembler compatibility Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //	1 //

2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.	2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.

3 //	3 //

4 // Use of this source code is governed by a BSD-style license	4 // Use of this source code is governed by a BSD-style license

5 // that can be found in the LICENSE file in the root of the source	5 // that can be found in the LICENSE file in the root of the source

6 // tree. An additional intellectual property rights grant can be found	6 // tree. An additional intellectual property rights grant can be found

7 // in the file PATENTS. All contributing project authors may	7 // in the file PATENTS. All contributing project authors may

8 // be found in the AUTHORS file in the root of the source tree.	8 // be found in the AUTHORS file in the root of the source tree.

9 //	9 //

10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s	10 // This is a modification of omxSP_FFTFwd_RToCCS_S32_Sfs_s.s

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
87 #define dW1r8b v16.8b	87 #define dW1r8b v16.8b

88 #define dW1i v17.2s	88 #define dW1i v17.2s

89 #define dY0r v14.2s	89 #define dY0r v14.2s

90 #define dY0i v15.2s	90 #define dY0i v15.2s

91 #define dY1r v16.2s	91 #define dY1r v16.2s

92 #define dY1i v17.2s	92 #define dY1i v17.2s

93 #define qT2 v18.2s	93 #define qT2 v18.2s

94 #define qT3 v20.2s	94 #define qT3 v20.2s

95	95

96 #define half v0.2s	96 #define half v0.2s

	97 #define halfs v0.s

97 #define dZip v21.2s	98 #define dZip v21.2s

98 #define dZip8b v21.8b	99 #define dZip8b v21.8b

99	100

100 // Allocate stack memory required by the function	101 // Allocate stack memory required by the function

101	102

102 // Write function header	103 // Write function header

103 M_START ComplexToRealFixup,,d15	104 M_START ComplexToRealFixup,,d15

104	105

105 asr N, N, #1	106 asr N, N, #1

106	107

107 clz order, subFFTNum // N = 2^order	108 clz order, subFFTNum // N = 2^order

108	109

109 RSB order,order,#63	110 rsb order,order,#63

110 MOV subFFTSize,subFFTNum // subFFTSize = N/2	111 MOV subFFTSize,subFFTNum // subFFTSize = N/2

111 //MOV subFFTNum,N	112 //MOV subFFTNum,N

112 mov argDst, pDst	113 mov argDst, pDst

113 mov argTwiddle, pTwiddle	114 mov argTwiddle, pTwiddle

114	115

115 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]	116 // F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]

116 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]	117 // 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]

117 // 1/2[2a+j0] - j [0+j2b]	118 // 1/2[2a+j0] - j [0+j2b]

118 // (a+b, 0)	119 // (a+b, 0)

119	120

120 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]	121 // F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]

121 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]	122 // 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]

122 // 1/2[2a+j0] + j [0+j2b]	123 // 1/2[2a+j0] + j [0+j2b]

123 // (a-b, 0)	124 // (a-b, 0)

124	125

125 // F(0) and F(N/2)	126 // F(0) and F(N/2)

126 ld2 {dX0rs,dX0is}[0],[pSrc], #8	127 ld2 {dX0rs,dX0is}[0],[pSrc], #8

127 MOV zero,#0	128 MOV zero,#0

128 mov dX0rs[1],zero	129 mov dX0rs[1],zero

129 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes	130 lsl step,subFFTSize, #3 // step = N/2 * 8 bytes

130 mov dX0i[1],zero	131 mov dX0is[1],zero

131 // twStep = 3N/8 * 8 bytes pointing to W^1	132 // twStep = 3N/8 * 8 bytes pointing to W^1

132 SUB twStep,step,subFFTSize,LSL #1	133 SUB twStep,step,subFFTSize,LSL #1

133	134

134 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0)	135 fadd dY0r,dX0r,dX0i // F(0) = ((Z0.r+Z0.i) , 0)

135 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes	136 lsl step1,subFFTSize, #2 // step1 = N/2 * 4 bytes

136 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0)	137 fsub dY0i,dX0r,dX0i // F(N/2) = ((Z0.r-Z0.i) , 0)

137 SUBS subFFTSize,subFFTSize,#2	138 SUBS subFFTSize,subFFTSize,#2

138	139

139 st1 {dY0r},[argDst],step	140 st1 {dY0r},[argDst],step

140 ADD pTwiddleTmp,argTwiddle,#8 // W^2	141 ADD pTwiddleTmp,argTwiddle,#8 // W^2

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
178 rev64 dX1i,dX1i	179 rev64 dX1i,dX1i

179 SUBS subFFTSize,subFFTSize,#4	180 SUBS subFFTSize,subFFTSize,#4

180	181

181	182

182	183

183 fsub dT2,dX0r,dX1r // a-c	184 fsub dT2,dX0r,dX1r // a-c

184 SUB step1,step1,#8	185 SUB step1,step1,#8

185 fadd dT0,dX0r,dX1r // a+c	186 fadd dT0,dX0r,dX1r // a+c

186 fsub dT1,dX0i,dX1i // b-d	187 fsub dT1,dX0i,dX1i // b-d

187 fadd dT3,dX0i,dX1i // b+d	188 fadd dT3,dX0i,dX1i // b+d

188 fmul dT0,dT0,half[0]	189 fmul dT0,dT0,halfs[0]

189 fmul dT1,dT1,half[0]	190 fmul dT1,dT1,halfs[0]

190 // VZIP dW1r,dW1i	191 // VZIP dW1r,dW1i

191 // VZIP dW0r,dW0i	192 // VZIP dW0r,dW0i

192 zip1 dZip, dW1r, dW1i	193 zip1 dZip, dW1r, dW1i

193 zip2 dW1i, dW1r, dW1i	194 zip2 dW1i, dW1r, dW1i

194 mov dW1r8b, dZip8b	195 mov dW1r8b, dZip8b

195 zip1 dZip, dW0r, dW0i	196 zip1 dZip, dW0r, dW0i

196 zip2 dW0i, dW0r, dW0i	197 zip2 dW0i, dW0r, dW0i

197 mov dW0r8b, dZip8b	198 mov dW0r8b, dZip8b

198	199

199 fmul qT0,dW1r,dT2	200 fmul qT0,dW1r,dT2

200 fmul qT1,dW1r,dT3	201 fmul qT1,dW1r,dT3

201 fmul qT2,dW0r,dT2	202 fmul qT2,dW0r,dT2

202 fmul qT3,dW0r,dT3	203 fmul qT3,dW0r,dT3

203	204

204 fmla qT0,dW1i,dT3	205 fmla qT0,dW1i,dT3

205 fmls qT1,dW1i,dT2	206 fmls qT1,dW1i,dT2

206	207

207 fmls qT2,dW0i,dT3	208 fmls qT2,dW0i,dT3

208 fmla qT3,dW0i,dT2	209 fmla qT3,dW0i,dT2

209	210

210	211

211 fmul dX1r,qT0,half[0]	212 fmul dX1r,qT0,halfs[0]

212 fmul dX1i,qT1,half[0]	213 fmul dX1i,qT1,halfs[0]

213	214

214 fsub dY1r,dT0,dX1i // F(N/2 -1)	215 fsub dY1r,dT0,dX1i // F(N/2 -1)

215 fadd dY1i,dT1,dX1r	216 fadd dY1i,dT1,dX1r

216 fneg dY1i,dY1i	217 fneg dY1i,dY1i

217	218

218 rev64 dY1r,dY1r	219 rev64 dY1r,dY1r

219 rev64 dY1i,dY1i	220 rev64 dY1i,dY1i

220	221

221	222

222 fmul dX0r,qT2,half[0]	223 fmul dX0r,qT2,halfs[0]

223 fmul dX0i,qT3,half[0]	224 fmul dX0i,qT3,halfs[0]

224	225

225 fsub dY0r,dT0,dX0i // F(1)	226 fsub dY0r,dT0,dX0i // F(1)

226 fadd dY0i,dT1,dX0r	227 fadd dY0i,dT1,dX0r

227	228

228	229

229 st2 {dY0r,dY0i},[argDst],step	230 st2 {dY0r,dY0i},[argDst],step

230 st2 {dY1r,dY1i},[argDst], #16	231 st2 {dY1r,dY1i},[argDst], #16

231 SUB argDst,argDst,step	232 SUB argDst,argDst,step

232 SUB step,step,#32 // (N/2-4)*8 bytes	233 SUB step,step,#32 // (N/2-4)*8 bytes

233	234

(...skipping 18 matching lines...) Expand all Loading...
252	253

253 st1 {dX0rs}[0],[argDst], #4	254 st1 {dX0rs}[0],[argDst], #4

254 fneg dX0r,dX0r	255 fneg dX0r,dX0r

255 st1 {dX0rs}[1],[argDst], #4	256 st1 {dX0rs}[1],[argDst], #4

256 End:	257 End:

257	258

258 // Write function tail	259 // Write function tail

259 M_END	260 M_END

260	261

261 .end	262 .end

OLD	NEW

« no previous file with comments | « dl/sp/api/armSP.h ('k') | dl/sp/src/arm/arm64/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_s.S » ('j') | no next file with comments »