OLD | NEW |
1 // | 1 // |
2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
3 // | 3 // |
4 // Use of this source code is governed by a BSD-style license | 4 // Use of this source code is governed by a BSD-style license |
5 // that can be found in the LICENSE file in the root of the source | 5 // that can be found in the LICENSE file in the root of the source |
6 // tree. An additional intellectual property rights grant can be found | 6 // tree. An additional intellectual property rights grant can be found |
7 // in the file PATENTS. All contributing project authors may | 7 // in the file PATENTS. All contributing project authors may |
8 // be found in the AUTHORS file in the root of the source tree. | 8 // be found in the AUTHORS file in the root of the source tree. |
9 // | 9 // |
10 // This is a modification of | 10 // This is a modification of |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
105 #define dY2 v4.2s | 105 #define dY2 v4.2s |
106 #define dY3 v5.2s | 106 #define dY3 v5.2s |
107 #define dW0 v6.2s | 107 #define dW0 v6.2s |
108 #define dW1 v7.2s | 108 #define dW1 v7.2s |
109 #define dW0Tmp v10.2s | 109 #define dW0Tmp v10.2s |
110 #define dW1Neg v11.2s | 110 #define dW1Neg v11.2s |
111 | 111 |
112 #define dZip v19.2s | 112 #define dZip v19.2s |
113 #define dZip8b v19.8b | 113 #define dZip8b v19.8b |
114 #define half v13.2s | 114 #define half v13.2s |
| 115 #define halfs v13.s |
115 | 116 |
116 .macro FFTSTAGE scaled, inverse, name | 117 .macro FFTSTAGE scaled, inverse, name |
117 | 118 |
118 fmov half, 0.5 | 119 fmov half, 0.5 |
119 | 120 |
120 asr size, subFFTNum, #1 // preserve the contents of N = su
bFFTNum | 121 asr size, subFFTNum, #1 // preserve the contents of N = su
bFFTNum |
121 lsl step, subFFTNum, #2 // step = N/2 * 8 bytes | 122 lsl step, subFFTNum, #2 // step = N/2 * 8 bytes |
122 | 123 |
123 | 124 |
124 // Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]} | 125 // Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]} |
125 // Note: W^(k) is stored as negated value and also need to | 126 // Note: W^(k) is stored as negated value and also need to |
126 // conjugate the values from the table | 127 // conjugate the values from the table |
127 | 128 |
128 // Z(0) : no need of twiddle multiply | 129 // Z(0) : no need of twiddle multiply |
129 // Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] } | 130 // Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] } |
130 | 131 |
131 ld1 {dX0},[pSrc],step | 132 ld1 {dX0},[pSrc],step |
132 ADD pOut1,pOut,step // pOut1 = pOut+ N/2*8 bytes | 133 ADD pOut1,pOut,step // pOut1 = pOut+ N/2*8 bytes |
133 | 134 |
134 ld1 {dX1},[pSrc], #8 | 135 ld1 {dX1},[pSrc], #8 |
135 // twStep = 3N/8 * 8 bytes pointing to W^1 | 136 // twStep = 3N/8 * 8 bytes pointing to W^1 |
136 SUB twStep,step,size,LSL #1 | 137 SUB twStep,step,size,LSL #1 |
137 | 138 |
138 lsl step1,size, #2 // step1 = N/4 * 8 = N/2*4 bytes | 139 lsl step1,size, #2 // step1 = N/4 * 8 = N/2*4 bytes |
139 SUB step1,step1,#8 // (N/4-1)*8 bytes | 140 SUB step1,step1,#8 // (N/4-1)*8 bytes |
140 | 141 |
141 fadd dY0,dX0,dX1 // [b+d | a+c] | 142 fadd dY0,dX0,dX1 // [b+d | a+c] |
142 fsub dY1,dX0,dX1 // [b-d | a-c] | 143 fsub dY1,dX0,dX1 // [b-d | a-c] |
143 fmul dY0, dY0, half[0] | 144 fmul dY0, dY0, halfs[0] |
144 fmul dY1, dY1, half[0] | 145 fmul dY1, dY1, halfs[0] |
145 | 146 |
146 // dY0= [a-c | a+c] ;dY1= [b-d | b+d] | 147 // dY0= [a-c | a+c] ;dY1= [b-d | b+d] |
147 // VZIP dY0,dY1 | 148 // VZIP dY0,dY1 |
148 zip1 dZip,dY0,dY1 | 149 zip1 dZip,dY0,dY1 |
149 zip2 dY1,dY0,dY1 | 150 zip2 dY1,dY0,dY1 |
150 mov dY08b, dZip8b | 151 mov dY08b, dZip8b |
151 | 152 |
152 fsub dX0,dY0,dY1 | 153 fsub dX0,dY0,dY1 |
153 SUBS size,size,#2 | 154 SUBS size,size,#2 |
154 fadd dX1,dY0,dY1 | 155 fadd dX1,dY0,dY1 |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
194 rev64 dX1i,dX1i | 195 rev64 dX1i,dX1i |
195 SUBS size,size,#4 | 196 SUBS size,size,#4 |
196 | 197 |
197 | 198 |
198 fsub dT2,dX0r,dX1r // a-c | 199 fsub dT2,dX0r,dX1r // a-c |
199 fadd dT3,dX0i,dX1i // b+d | 200 fadd dT3,dX0i,dX1i // b+d |
200 fadd dT0,dX0r,dX1r // a+c | 201 fadd dT0,dX0r,dX1r // a+c |
201 fsub dT1,dX0i,dX1i // b-d | 202 fsub dT1,dX0i,dX1i // b-d |
202 SUB step1,step1,#8 | 203 SUB step1,step1,#8 |
203 | 204 |
204 fmul dT2, dT2, half[0] | 205 fmul dT2, dT2, halfs[0] |
205 fmul dT3, dT3, half[0] | 206 fmul dT3, dT3, halfs[0] |
206 | 207 |
207 fmul dT0, dT0, half[0] | 208 fmul dT0, dT0, halfs[0] |
208 fmul dT1, dT1, half[0] | 209 fmul dT1, dT1, halfs[0] |
209 | 210 |
210 // VZIP dW1r,dW1i | 211 // VZIP dW1r,dW1i |
211 // VZIP dW0r,dW0i | 212 // VZIP dW0r,dW0i |
212 zip1 dZip, dW1r,dW1i | 213 zip1 dZip, dW1r,dW1i |
213 zip2 dW1i,dW1r,dW1i | 214 zip2 dW1i,dW1r,dW1i |
214 mov dW1r8b, dZip8b | 215 mov dW1r8b, dZip8b |
215 zip1 dZip,dW0r,dW0i | 216 zip1 dZip,dW0r,dW0i |
216 zip2 dW0i,dW0r,dW0i | 217 zip2 dW0i,dW0r,dW0i |
217 mov dW0r8b, dZip8b | 218 mov dW0r8b, dZip8b |
218 | 219 |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
271 | 272 |
272 decrementScale\name : | 273 decrementScale\name : |
273 | 274 |
274 .endm | 275 .endm |
275 | 276 |
276 M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2,,d15 | 277 M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2,,d15 |
277 FFTSTAGE "FALSE","TRUE",Inv | 278 FFTSTAGE "FALSE","TRUE",Inv |
278 M_END | 279 M_END |
279 | 280 |
280 .end | 281 .end |
OLD | NEW |