Index: dl/sp/src/arm/arm64/ComplexToRealFixup.S |
diff --git a/dl/sp/src/arm/arm64/ComplexToRealFixup.S b/dl/sp/src/arm/arm64/ComplexToRealFixup.S |
index 9b3009351d4ac6322c94989a1c1867354ec10ae7..c362fd87ae6b1f62956bb37fb3005637eb8f1923 100644 |
--- a/dl/sp/src/arm/arm64/ComplexToRealFixup.S |
+++ b/dl/sp/src/arm/arm64/ComplexToRealFixup.S |
@@ -93,7 +93,7 @@ |
#define qT2 v18.2s |
#define qT3 v20.2s |
-#define half v0.2s |
+#define half v0.s |
#define dZip v21.2s |
#define dZip8b v21.8b |
@@ -106,7 +106,7 @@ |
clz order, subFFTNum // N = 2^order |
- RSB order,order,#63 |
+ rsb order,order,#63 |
MOV subFFTSize,subFFTNum // subFFTSize = N/2 |
//MOV subFFTNum,N |
mov argDst, pDst |
@@ -127,7 +127,7 @@ |
MOV zero,#0 |
mov dX0rs[1],zero |
lsl step,subFFTSize, #3 // step = N/2 * 8 bytes |
- mov dX0i[1],zero |
+ mov dX0is[1],zero |
// twStep = 3N/8 * 8 bytes pointing to W^1 |
SUB twStep,step,subFFTSize,LSL #1 |
@@ -154,7 +154,7 @@ |
// Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1) |
// since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1) |
- fmov half, #0.5 |
+ fmov v0.2s, #0.5 |
Raymond Toy (Google)
2015/10/30 22:38:46
I think you should change the definition of half i
Riku Voipio
2015/11/02 12:24:12
Ok, I'll define half = v0.2s, and halfs = v0.s to
|
evenOddButterflyLoop: |