Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(45)

Side by Side Diff: dl/sp/src/arm/arm64/armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_s.S

Issue 1420973006: arm64: clang assembler compatability (Closed) Base URL: https://chromium.googlesource.com/external/webrtc/deps/third_party/openmax@master
Patch Set: arm64: clang assembler compatibility Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // 1 //
2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 // 3 //
4 // Use of this source code is governed by a BSD-style license 4 // Use of this source code is governed by a BSD-style license
5 // that can be found in the LICENSE file in the root of the source 5 // that can be found in the LICENSE file in the root of the source
6 // tree. An additional intellectual property rights grant can be found 6 // tree. An additional intellectual property rights grant can be found
7 // in the file PATENTS. All contributing project authors may 7 // in the file PATENTS. All contributing project authors may
8 // be found in the AUTHORS file in the root of the source tree. 8 // be found in the AUTHORS file in the root of the source tree.
9 // 9 //
10 // This is a modification of 10 // This is a modification of
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
105 #define dY2 v4.2s 105 #define dY2 v4.2s
106 #define dY3 v5.2s 106 #define dY3 v5.2s
107 #define dW0 v6.2s 107 #define dW0 v6.2s
108 #define dW1 v7.2s 108 #define dW1 v7.2s
109 #define dW0Tmp v10.2s 109 #define dW0Tmp v10.2s
110 #define dW1Neg v11.2s 110 #define dW1Neg v11.2s
111 111
112 #define dZip v19.2s 112 #define dZip v19.2s
113 #define dZip8b v19.8b 113 #define dZip8b v19.8b
114 #define half v13.2s 114 #define half v13.2s
115 #define halfs v13.s
115 116
116 .macro FFTSTAGE scaled, inverse, name 117 .macro FFTSTAGE scaled, inverse, name
117 118
118 fmov half, 0.5 119 fmov half, 0.5
119 120
120 asr size, subFFTNum, #1 // preserve the contents of N = su bFFTNum 121 asr size, subFFTNum, #1 // preserve the contents of N = su bFFTNum
121 lsl step, subFFTNum, #2 // step = N/2 * 8 bytes 122 lsl step, subFFTNum, #2 // step = N/2 * 8 bytes
122 123
123 124
124 // Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]} 125 // Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
125 // Note: W^(k) is stored as negated value and also need to 126 // Note: W^(k) is stored as negated value and also need to
126 // conjugate the values from the table 127 // conjugate the values from the table
127 128
128 // Z(0) : no need of twiddle multiply 129 // Z(0) : no need of twiddle multiply
129 // Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] } 130 // Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
130 131
131 ld1 {dX0},[pSrc],step 132 ld1 {dX0},[pSrc],step
132 ADD pOut1,pOut,step // pOut1 = pOut+ N/2*8 bytes 133 ADD pOut1,pOut,step // pOut1 = pOut+ N/2*8 bytes
133 134
134 ld1 {dX1},[pSrc], #8 135 ld1 {dX1},[pSrc], #8
135 // twStep = 3N/8 * 8 bytes pointing to W^1 136 // twStep = 3N/8 * 8 bytes pointing to W^1
136 SUB twStep,step,size,LSL #1 137 SUB twStep,step,size,LSL #1
137 138
138 lsl step1,size, #2 // step1 = N/4 * 8 = N/2*4 bytes 139 lsl step1,size, #2 // step1 = N/4 * 8 = N/2*4 bytes
139 SUB step1,step1,#8 // (N/4-1)*8 bytes 140 SUB step1,step1,#8 // (N/4-1)*8 bytes
140 141
141 fadd dY0,dX0,dX1 // [b+d | a+c] 142 fadd dY0,dX0,dX1 // [b+d | a+c]
142 fsub dY1,dX0,dX1 // [b-d | a-c] 143 fsub dY1,dX0,dX1 // [b-d | a-c]
143 fmul dY0, dY0, half[0] 144 fmul dY0, dY0, halfs[0]
144 fmul dY1, dY1, half[0] 145 fmul dY1, dY1, halfs[0]
145 146
146 // dY0= [a-c | a+c] ;dY1= [b-d | b+d] 147 // dY0= [a-c | a+c] ;dY1= [b-d | b+d]
147 // VZIP dY0,dY1 148 // VZIP dY0,dY1
148 zip1 dZip,dY0,dY1 149 zip1 dZip,dY0,dY1
149 zip2 dY1,dY0,dY1 150 zip2 dY1,dY0,dY1
150 mov dY08b, dZip8b 151 mov dY08b, dZip8b
151 152
152 fsub dX0,dY0,dY1 153 fsub dX0,dY0,dY1
153 SUBS size,size,#2 154 SUBS size,size,#2
154 fadd dX1,dY0,dY1 155 fadd dX1,dY0,dY1
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 rev64 dX1i,dX1i 195 rev64 dX1i,dX1i
195 SUBS size,size,#4 196 SUBS size,size,#4
196 197
197 198
198 fsub dT2,dX0r,dX1r // a-c 199 fsub dT2,dX0r,dX1r // a-c
199 fadd dT3,dX0i,dX1i // b+d 200 fadd dT3,dX0i,dX1i // b+d
200 fadd dT0,dX0r,dX1r // a+c 201 fadd dT0,dX0r,dX1r // a+c
201 fsub dT1,dX0i,dX1i // b-d 202 fsub dT1,dX0i,dX1i // b-d
202 SUB step1,step1,#8 203 SUB step1,step1,#8
203 204
204 fmul dT2, dT2, half[0] 205 fmul dT2, dT2, halfs[0]
205 fmul dT3, dT3, half[0] 206 fmul dT3, dT3, halfs[0]
206 207
207 fmul dT0, dT0, half[0] 208 fmul dT0, dT0, halfs[0]
208 fmul dT1, dT1, half[0] 209 fmul dT1, dT1, halfs[0]
209 210
210 // VZIP dW1r,dW1i 211 // VZIP dW1r,dW1i
211 // VZIP dW0r,dW0i 212 // VZIP dW0r,dW0i
212 zip1 dZip, dW1r,dW1i 213 zip1 dZip, dW1r,dW1i
213 zip2 dW1i,dW1r,dW1i 214 zip2 dW1i,dW1r,dW1i
214 mov dW1r8b, dZip8b 215 mov dW1r8b, dZip8b
215 zip1 dZip,dW0r,dW0i 216 zip1 dZip,dW0r,dW0i
216 zip2 dW0i,dW0r,dW0i 217 zip2 dW0i,dW0r,dW0i
217 mov dW0r8b, dZip8b 218 mov dW0r8b, dZip8b
218 219
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
271 272
272 decrementScale\name : 273 decrementScale\name :
273 274
274 .endm 275 .endm
275 276
276 M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2,,d15 277 M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2,,d15
277 FFTSTAGE "FALSE","TRUE",Inv 278 FFTSTAGE "FALSE","TRUE",Inv
278 M_END 279 M_END
279 280
280 .end 281 .end
OLDNEW
« no previous file with comments | « dl/sp/src/arm/arm64/ComplexToRealFixup.S ('k') | dl/sp/src/arm/arm64/armSP_FFT_CToC_FC32_Radix2_s.S » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698