Index: dl/sp/src/arm/arm64/ComplexToRealFixup.S |
diff --git a/dl/sp/src/arm/arm64/ComplexToRealFixup.S b/dl/sp/src/arm/arm64/ComplexToRealFixup.S |
index 9b3009351d4ac6322c94989a1c1867354ec10ae7..2580e7e8ce5ef9b6c0301eabb9f906ce9cdb3fcc 100644 |
--- a/dl/sp/src/arm/arm64/ComplexToRealFixup.S |
+++ b/dl/sp/src/arm/arm64/ComplexToRealFixup.S |
@@ -94,6 +94,7 @@ |
#define qT3 v20.2s |
#define half v0.2s |
+#define halfs v0.s |
#define dZip v21.2s |
#define dZip8b v21.8b |
@@ -106,7 +107,7 @@ |
clz order, subFFTNum // N = 2^order |
- RSB order,order,#63 |
+ rsb order,order,#63 |
MOV subFFTSize,subFFTNum // subFFTSize = N/2 |
//MOV subFFTNum,N |
mov argDst, pDst |
@@ -127,7 +128,7 @@ |
MOV zero,#0 |
mov dX0rs[1],zero |
lsl step,subFFTSize, #3 // step = N/2 * 8 bytes |
- mov dX0i[1],zero |
+ mov dX0is[1],zero |
// twStep = 3N/8 * 8 bytes pointing to W^1 |
SUB twStep,step,subFFTSize,LSL #1 |
@@ -185,8 +186,8 @@ evenOddButterflyLoop: |
fadd dT0,dX0r,dX1r // a+c |
fsub dT1,dX0i,dX1i // b-d |
fadd dT3,dX0i,dX1i // b+d |
- fmul dT0,dT0,half[0] |
- fmul dT1,dT1,half[0] |
+ fmul dT0,dT0,halfs[0] |
+ fmul dT1,dT1,halfs[0] |
// VZIP dW1r,dW1i |
// VZIP dW0r,dW0i |
zip1 dZip, dW1r, dW1i |
@@ -208,8 +209,8 @@ evenOddButterflyLoop: |
fmla qT3,dW0i,dT2 |
- fmul dX1r,qT0,half[0] |
- fmul dX1i,qT1,half[0] |
+ fmul dX1r,qT0,halfs[0] |
+ fmul dX1i,qT1,halfs[0] |
fsub dY1r,dT0,dX1i // F(N/2 -1) |
fadd dY1i,dT1,dX1r |
@@ -219,8 +220,8 @@ evenOddButterflyLoop: |
rev64 dY1i,dY1i |
- fmul dX0r,qT2,half[0] |
- fmul dX0i,qT3,half[0] |
+ fmul dX0r,qT2,halfs[0] |
+ fmul dX0i,qT3,halfs[0] |
fsub dY0r,dT0,dX0i // F(1) |
fadd dY0i,dT1,dX0r |