OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 26 matching lines...) Expand all Loading... | |
37 const float pi2 = 6.28318530717959f; | 37 const float pi2 = 6.28318530717959f; |
38 const float pi2t = pi2 / 32768; | 38 const float pi2t = pi2 / 32768; |
39 | 39 |
40 // Generate a uniform random array on [0 1] | 40 // Generate a uniform random array on [0 1] |
41 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); | 41 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); |
42 | 42 |
43 int16_t* randWptr = randW16; | 43 int16_t* randWptr = randW16; |
44 float randTemp, randTemp2, randTemp3, randTemp4; | 44 float randTemp, randTemp2, randTemp3, randTemp4; |
45 int32_t tmp1s, tmp2s, tmp3s, tmp4s; | 45 int32_t tmp1s, tmp2s, tmp3s, tmp4s; |
46 | 46 |
47 for (i = 0; i < PART_LEN; i+=4) { | 47 for (i = 0; i < PART_LEN; i += 4) { |
48 __asm __volatile ( | 48 __asm __volatile( |
49 ".set push \n\t" | 49 ".set push \n\t" |
50 ".set noreorder \n\t" | 50 ".set noreorder \n\t" |
51 "lh %[tmp1s], 0(%[randWptr]) \n\t" | 51 "lh %[tmp1s], 0(%[randWptr]) \n\t" |
52 "lh %[tmp2s], 2(%[randWptr]) \n\t" | 52 "lh %[tmp2s], 2(%[randWptr]) \n\t" |
53 "lh %[tmp3s], 4(%[randWptr]) \n\t" | 53 "lh %[tmp3s], 4(%[randWptr]) \n\t" |
54 "lh %[tmp4s], 6(%[randWptr]) \n\t" | 54 "lh %[tmp4s], 6(%[randWptr]) \n\t" |
55 "mtc1 %[tmp1s], %[randTemp] \n\t" | 55 "mtc1 %[tmp1s], %[randTemp] \n\t" |
56 "mtc1 %[tmp2s], %[randTemp2] \n\t" | 56 "mtc1 %[tmp2s], %[randTemp2] \n\t" |
57 "mtc1 %[tmp3s], %[randTemp3] \n\t" | 57 "mtc1 %[tmp3s], %[randTemp3] \n\t" |
58 "mtc1 %[tmp4s], %[randTemp4] \n\t" | 58 "mtc1 %[tmp4s], %[randTemp4] \n\t" |
59 "cvt.s.w %[randTemp], %[randTemp] \n\t" | 59 "cvt.s.w %[randTemp], %[randTemp] \n\t" |
60 "cvt.s.w %[randTemp2], %[randTemp2] \n\t" | 60 "cvt.s.w %[randTemp2], %[randTemp2] \n\t" |
61 "cvt.s.w %[randTemp3], %[randTemp3] \n\t" | 61 "cvt.s.w %[randTemp3], %[randTemp3] \n\t" |
62 "cvt.s.w %[randTemp4], %[randTemp4] \n\t" | 62 "cvt.s.w %[randTemp4], %[randTemp4] \n\t" |
63 "addiu %[randWptr], %[randWptr], 8 \n\t" | 63 "addiu %[randWptr], %[randWptr], 8 \n\t" |
64 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" | 64 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" |
65 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" | 65 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" |
66 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" | 66 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" |
67 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" | 67 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" |
68 ".set pop \n\t" | 68 ".set pop \n\t" |
69 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), | 69 : [randWptr] "+r"(randWptr), [randTemp] "=&f"(randTemp), |
70 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), | 70 [randTemp2] "=&f"(randTemp2), [randTemp3] "=&f"(randTemp3), |
71 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), | 71 [randTemp4] "=&f"(randTemp4), [tmp1s] "=&r"(tmp1s), |
72 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), | 72 [tmp2s] "=&r"(tmp2s), [tmp3s] "=&r"(tmp3s), [tmp4s] "=&r"(tmp4s) |
73 [tmp4s] "=&r" (tmp4s) | 73 : [pi2t] "f"(pi2t) |
74 : [pi2t] "f" (pi2t) | 74 : "memory"); |
75 : "memory" | |
76 ); | |
77 | 75 |
78 u[i+1][0] = cosf(randTemp); | 76 u[i + 1][0] = cosf(randTemp); |
79 u[i+1][1] = sinf(randTemp); | 77 u[i + 1][1] = sinf(randTemp); |
80 u[i+2][0] = cosf(randTemp2); | 78 u[i + 2][0] = cosf(randTemp2); |
81 u[i+2][1] = sinf(randTemp2); | 79 u[i + 2][1] = sinf(randTemp2); |
82 u[i+3][0] = cosf(randTemp3); | 80 u[i + 3][0] = cosf(randTemp3); |
83 u[i+3][1] = sinf(randTemp3); | 81 u[i + 3][1] = sinf(randTemp3); |
84 u[i+4][0] = cosf(randTemp4); | 82 u[i + 4][0] = cosf(randTemp4); |
85 u[i+4][1] = sinf(randTemp4); | 83 u[i + 4][1] = sinf(randTemp4); |
86 } | 84 } |
87 | 85 |
88 // Reject LF noise | 86 // Reject LF noise |
89 float* u_ptr = &u[1][0]; | 87 float* u_ptr = &u[1][0]; |
90 float noise2, noise3, noise4; | 88 float noise2, noise3, noise4; |
91 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; | 89 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; |
92 | 90 |
93 u[0][0] = 0; | 91 u[0][0] = 0; |
94 u[0][1] = 0; | 92 u[0][1] = 0; |
95 for (i = 1; i < PART_LEN1; i+=4) { | 93 for (i = 1; i < PART_LEN1; i += 4) { |
96 __asm __volatile ( | 94 __asm __volatile( |
97 ".set push \n\t" | 95 ".set push \n\t" |
98 ".set noreorder \n\t" | 96 ".set noreorder \n\t" |
99 "lwc1 %[noise], 4(%[noisePow]) \n\t" | 97 "lwc1 %[noise], 4(%[noisePow]) \n\t" |
100 "lwc1 %[noise2], 8(%[noisePow]) \n\t" | 98 "lwc1 %[noise2], 8(%[noisePow]) \n\t" |
101 "lwc1 %[noise3], 12(%[noisePow]) \n\t" | 99 "lwc1 %[noise3], 12(%[noisePow]) \n\t" |
102 "lwc1 %[noise4], 16(%[noisePow]) \n\t" | 100 "lwc1 %[noise4], 16(%[noisePow]) \n\t" |
103 "sqrt.s %[noise], %[noise] \n\t" | 101 "sqrt.s %[noise], %[noise] \n\t" |
104 "sqrt.s %[noise2], %[noise2] \n\t" | 102 "sqrt.s %[noise2], %[noise2] \n\t" |
105 "sqrt.s %[noise3], %[noise3] \n\t" | 103 "sqrt.s %[noise3], %[noise3] \n\t" |
106 "sqrt.s %[noise4], %[noise4] \n\t" | 104 "sqrt.s %[noise4], %[noise4] \n\t" |
107 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" | 105 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" |
108 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" | 106 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" |
109 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" | 107 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" |
110 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" | 108 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" |
111 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" | 109 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" |
112 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" | 110 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" |
113 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" | 111 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" |
114 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" | 112 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" |
115 "addiu %[noisePow], %[noisePow], 16 \n\t" | 113 "addiu %[noisePow], %[noisePow], 16 \n\t" |
116 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" | 114 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" |
117 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" | 115 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" |
118 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" | 116 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" |
119 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" | 117 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" |
120 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" | 118 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" |
121 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" | 119 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" |
122 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" | 120 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" |
123 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" | 121 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" |
124 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" | 122 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" |
125 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" | 123 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" |
126 "neg.s %[tmp2f] \n\t" | 124 "neg.s %[tmp2f] \n\t" |
127 "neg.s %[tmp4f] \n\t" | 125 "neg.s %[tmp4f] \n\t" |
128 "neg.s %[tmp6f] \n\t" | 126 "neg.s %[tmp6f] \n\t" |
129 "neg.s %[tmp8f] \n\t" | 127 "neg.s %[tmp8f] \n\t" |
130 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" | 128 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" |
131 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" | 129 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" |
132 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" | 130 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" |
133 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" | 131 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" |
134 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" | 132 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" |
135 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" | 133 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" |
136 "addiu %[u_ptr], %[u_ptr], 32 \n\t" | 134 "addiu %[u_ptr], %[u_ptr], 32 \n\t" |
137 ".set pop \n\t" | 135 ".set pop \n\t" |
138 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), | 136 : [u_ptr] "+r"(u_ptr), [noisePow] "+r"(noisePow), [noise] "=&f"(noise), |
139 [noise] "=&f" (noise), [noise2] "=&f" (noise2), | 137 [noise2] "=&f"(noise2), [noise3] "=&f"(noise3), |
140 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), | 138 [noise4] "=&f"(noise4), [tmp1f] "=&f"(tmp1f), [tmp2f] "=&f"(tmp2f), |
141 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), | 139 [tmp3f] "=&f"(tmp3f), [tmp4f] "=&f"(tmp4f), [tmp5f] "=&f"(tmp5f), |
142 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), | 140 [tmp6f] "=&f"(tmp6f), [tmp7f] "=&f"(tmp7f), [tmp8f] "=&f"(tmp8f) |
143 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), | 141 : |
144 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) | 142 : "memory"); |
145 : | |
146 : "memory" | |
147 ); | |
148 } | 143 } |
149 u[PART_LEN][1] = 0; | 144 u[PART_LEN][1] = 0; |
150 noisePow -= PART_LEN; | 145 noisePow -= PART_LEN; |
151 | 146 |
152 u_ptr = &u[0][0]; | 147 u_ptr = &u[0][0]; |
153 float* u_ptr_end = &u[PART_LEN][0]; | 148 float* u_ptr_end = &u[PART_LEN][0]; |
154 float* efw_ptr_0 = &efw[0][0]; | 149 float* efw_ptr_0 = &efw[0][0]; |
155 float* efw_ptr_1 = &efw[1][0]; | 150 float* efw_ptr_1 = &efw[1][0]; |
156 float tmp9f, tmp10f; | 151 float tmp9f, tmp10f; |
157 const float tmp1c = 1.0; | 152 const float tmp1c = 1.0; |
158 | 153 |
159 __asm __volatile ( | 154 __asm __volatile( |
160 ".set push \n\t" | 155 ".set push " |
161 ".set noreorder \n\t" | 156 "\n\t" |
162 "1: \n\t" | 157 ".set noreorder " |
163 "lwc1 %[tmp1f], 0(%[lambda]) \n\t" | 158 "\n\t" |
164 "lwc1 %[tmp6f], 4(%[lambda]) \n\t" | 159 "1: " |
165 "addiu %[lambda], %[lambda], 8 \n\t" | 160 "\n\t" |
166 "c.lt.s %[tmp1f], %[tmp1c] \n\t" | 161 "lwc1 %[tmp1f], 0(%[lambda]) " |
167 "bc1f 4f \n\t" | 162 "\n\t" |
168 " nop \n\t" | 163 "lwc1 %[tmp6f], 4(%[lambda]) " |
169 "c.lt.s %[tmp6f], %[tmp1c] \n\t" | 164 "\n\t" |
170 "bc1f 3f \n\t" | 165 "addiu %[lambda], %[lambda], 8 " |
171 " nop \n\t" | 166 "\n\t" |
172 "2: \n\t" | 167 "c.lt.s %[tmp1f], %[tmp1c] " |
173 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" | 168 "\n\t" |
174 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" | 169 "bc1f 4f " |
175 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" | 170 "\n\t" |
176 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" | 171 " nop " |
177 "sqrt.s %[tmp1f], %[tmp1f] \n\t" | 172 "\n\t" |
178 "sqrt.s %[tmp6f], %[tmp6f] \n\t" | 173 "c.lt.s %[tmp6f], %[tmp1c] " |
179 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | 174 "\n\t" |
180 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" | 175 "bc1f 3f " |
181 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | 176 "\n\t" |
182 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" | 177 " nop " |
183 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | 178 "\n\t" |
184 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" | 179 "2: " |
185 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | 180 "\n\t" |
186 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" | 181 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] " |
182 "\n\t" | |
183 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] " | |
184 "\n\t" | |
185 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] " | |
186 "\n\t" | |
187 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] " | |
188 "\n\t" | |
189 "sqrt.s %[tmp1f], %[tmp1f] " | |
190 "\n\t" | |
191 "sqrt.s %[tmp6f], %[tmp6f] " | |
192 "\n\t" | |
193 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) " | |
194 "\n\t" | |
195 "lwc1 %[tmp3f], 0(%[u_ptr]) " | |
196 "\n\t" | |
197 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) " | |
198 "\n\t" | |
199 "lwc1 %[tmp8f], 8(%[u_ptr]) " | |
200 "\n\t" | |
201 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) " | |
202 "\n\t" | |
203 "lwc1 %[tmp5f], 4(%[u_ptr]) " | |
204 "\n\t" | |
205 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) " | |
206 "\n\t" | |
207 "lwc1 %[tmp10f], 12(%[u_ptr]) " | |
208 "\n\t" | |
187 #if !defined(MIPS32_R2_LE) | 209 #if !defined(MIPS32_R2_LE) |
188 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" | 210 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] " |
189 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" | 211 "\n\t" |
190 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" | 212 "add.s %[tmp2f], %[tmp2f], %[tmp3f] " |
191 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" | 213 "\n\t" |
192 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" | 214 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] " |
193 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" | 215 "\n\t" |
194 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" | 216 "add.s %[tmp4f], %[tmp4f], %[tmp3f] " |
195 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" | 217 "\n\t" |
196 #else // #if !defined(MIPS32_R2_LE) | 218 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] " |
197 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" | 219 "\n\t" |
198 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" | 220 "add.s %[tmp7f], %[tmp7f], %[tmp3f] " |
199 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" | 221 "\n\t" |
200 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" | 222 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] " |
201 #endif // #if !defined(MIPS32_R2_LE) | 223 "\n\t" |
202 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | 224 "add.s %[tmp9f], %[tmp9f], %[tmp3f] " |
203 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | 225 "\n\t" |
204 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | 226 #else // #if !defined(MIPS32_R2_LE) |
205 "b 5f \n\t" | 227 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] " |
206 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | 228 "\n\t" |
207 "3: \n\t" | 229 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] " |
208 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" | 230 "\n\t" |
209 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" | 231 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] " |
210 "sqrt.s %[tmp1f], %[tmp1f] \n\t" | 232 "\n\t" |
211 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | 233 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] " |
212 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" | 234 "\n\t" |
213 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | 235 #endif // #if !defined(MIPS32_R2_LE) |
214 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" | 236 "swc1 %[tmp2f], 0(%[efw_ptr_0]) " |
237 "\n\t" | |
238 "swc1 %[tmp4f], 0(%[efw_ptr_1]) " | |
239 "\n\t" | |
240 "swc1 %[tmp7f], 4(%[efw_ptr_0]) " | |
241 "\n\t" | |
242 "b 5f " | |
243 "\n\t" | |
244 " swc1 %[tmp9f], 4(%[efw_ptr_1]) " | |
245 "\n\t" | |
246 "3: " | |
247 "\n\t" | |
248 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] " | |
249 "\n\t" | |
250 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] " | |
251 "\n\t" | |
252 "sqrt.s %[tmp1f], %[tmp1f] " | |
253 "\n\t" | |
254 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) " | |
255 "\n\t" | |
256 "lwc1 %[tmp3f], 0(%[u_ptr]) " | |
257 "\n\t" | |
258 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) " | |
259 "\n\t" | |
260 "lwc1 %[tmp5f], 4(%[u_ptr]) " | |
261 "\n\t" | |
215 #if !defined(MIPS32_R2_LE) | 262 #if !defined(MIPS32_R2_LE) |
216 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" | 263 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] " |
217 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" | 264 "\n\t" |
218 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" | 265 "add.s %[tmp2f], %[tmp2f], %[tmp3f] " |
219 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" | 266 "\n\t" |
220 #else // #if !defined(MIPS32_R2_LE) | 267 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] " |
221 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" | 268 "\n\t" |
222 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" | 269 "add.s %[tmp4f], %[tmp4f], %[tmp3f] " |
223 #endif // #if !defined(MIPS32_R2_LE) | 270 "\n\t" |
224 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | 271 #else // #if !defined(MIPS32_R2_LE) |
225 "b 5f \n\t" | 272 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] " |
226 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | 273 "\n\t" |
227 "4: \n\t" | 274 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] " |
228 "c.lt.s %[tmp6f], %[tmp1c] \n\t" | 275 "\n\t" |
229 "bc1f 5f \n\t" | 276 #endif // #if !defined(MIPS32_R2_LE) |
230 " nop \n\t" | 277 "swc1 %[tmp2f], 0(%[efw_ptr_0]) " |
231 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" | 278 "\n\t" |
232 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" | 279 "b 5f " |
233 "sqrt.s %[tmp6f], %[tmp6f] \n\t" | 280 "\n\t" |
234 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | 281 " swc1 %[tmp4f], 0(%[efw_ptr_1]) " |
235 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" | 282 "\n\t" |
236 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | 283 "4: " |
237 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" | 284 "\n\t" |
285 "c.lt.s %[tmp6f], %[tmp1c] " | |
286 "\n\t" | |
287 "bc1f 5f " | |
288 "\n\t" | |
289 " nop " | |
290 "\n\t" | |
291 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] " | |
292 "\n\t" | |
293 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] " | |
294 "\n\t" | |
295 "sqrt.s %[tmp6f], %[tmp6f] " | |
296 "\n\t" | |
297 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) " | |
298 "\n\t" | |
299 "lwc1 %[tmp8f], 8(%[u_ptr]) " | |
300 "\n\t" | |
301 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) " | |
302 "\n\t" | |
303 "lwc1 %[tmp10f], 12(%[u_ptr]) " | |
304 "\n\t" | |
238 #if !defined(MIPS32_R2_LE) | 305 #if !defined(MIPS32_R2_LE) |
239 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" | 306 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] " |
240 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" | 307 "\n\t" |
241 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" | 308 "add.s %[tmp7f], %[tmp7f], %[tmp3f] " |
242 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" | 309 "\n\t" |
243 #else // #if !defined(MIPS32_R2_LE) | 310 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] " |
244 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" | 311 "\n\t" |
245 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" | 312 "add.s %[tmp9f], %[tmp9f], %[tmp3f] " |
246 #endif // #if !defined(MIPS32_R2_LE) | 313 "\n\t" |
247 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | 314 #else // #if !defined(MIPS32_R2_LE) |
248 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | 315 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] " |
249 "5: \n\t" | 316 "\n\t" |
250 "addiu %[u_ptr], %[u_ptr], 16 \n\t" | 317 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] " |
251 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" | 318 "\n\t" |
252 "bne %[u_ptr], %[u_ptr_end], 1b \n\t" | 319 #endif // #if !defined(MIPS32_R2_LE) |
253 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" | 320 "swc1 %[tmp7f], 4(%[efw_ptr_0]) " |
254 ".set pop \n\t" | 321 "\n\t" |
255 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), | 322 "swc1 %[tmp9f], 4(%[efw_ptr_1]) " |
256 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), | 323 "\n\t" |
257 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), | 324 "5: " |
258 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), | 325 "\n\t" |
259 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), | 326 "addiu %[u_ptr], %[u_ptr], 16 " |
260 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) | 327 "\n\t" |
261 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) | 328 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 " |
262 : "memory" | 329 "\n\t" |
263 ); | 330 "bne %[u_ptr], %[u_ptr_end], 1b " |
331 "\n\t" | |
332 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 " | |
333 "\n\t" | |
334 ".set pop " | |
335 "\n\t" | |
336 : [lambda] "+r"(lambda), [u_ptr] "+r"(u_ptr), [efw_ptr_0] "+r"(efw_ptr_0), | |
337 [efw_ptr_1] "+r"(efw_ptr_1), [tmp1f] "=&f"(tmp1f), [tmp2f] "=&f"(tmp2f), | |
338 [tmp3f] "=&f"(tmp3f), [tmp4f] "=&f"(tmp4f), [tmp5f] "=&f"(tmp5f), | |
339 [tmp6f] "=&f"(tmp6f), [tmp7f] "=&f"(tmp7f), [tmp8f] "=&f"(tmp8f), | |
340 [tmp9f] "=&f"(tmp9f), [tmp10f] "=&f"(tmp10f) | |
341 : [tmp1c] "f"(tmp1c), [u_ptr_end] "r"(u_ptr_end) | |
342 : "memory"); | |
kwiberg-webrtc
2016/01/27 14:08:21
Ouch. May I ask for a follow-up CL that makes sure
| |
264 | 343 |
265 lambda -= PART_LEN; | 344 lambda -= PART_LEN; |
266 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); | 345 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); |
267 //tmp = 1 - lambda[i]; | 346 // tmp = 1 - lambda[i]; |
268 efw[0][PART_LEN] += tmp * u[PART_LEN][0]; | 347 efw[0][PART_LEN] += tmp * u[PART_LEN][0]; |
269 efw[1][PART_LEN] += tmp * u[PART_LEN][1]; | 348 efw[1][PART_LEN] += tmp * u[PART_LEN][1]; |
270 | 349 |
271 // For H band comfort noise | 350 // For H band comfort noise |
272 // TODO: don't compute noise and "tmp" twice. Use the previous results. | 351 // TODO: don't compute noise and "tmp" twice. Use the previous results. |
273 noiseAvg = 0.0; | 352 noiseAvg = 0.0; |
274 tmpAvg = 0.0; | 353 tmpAvg = 0.0; |
275 num = 0; | 354 num = 0; |
276 if (aec->num_bands > 1) { | 355 if (aec->num_bands > 1) { |
277 for (i = 0; i < PART_LEN; i++) { | 356 for (i = 0; i < PART_LEN; i++) { |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
326 int num_partitions, | 405 int num_partitions, |
327 int x_fft_buf_block_pos, | 406 int x_fft_buf_block_pos, |
328 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | 407 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], |
329 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | 408 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], |
330 float y_fft[2][PART_LEN1]) { | 409 float y_fft[2][PART_LEN1]) { |
331 int i; | 410 int i; |
332 for (i = 0; i < num_partitions; i++) { | 411 for (i = 0; i < num_partitions; i++) { |
333 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; | 412 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; |
334 int pos = i * PART_LEN1; | 413 int pos = i * PART_LEN1; |
335 // Check for wrap | 414 // Check for wrap |
336 if (i + x_fft_buf_block_pos >= num_partitions) { | 415 if (i + x_fft_buf_block_pos >= num_partitions) { |
337 xPos -= num_partitions * (PART_LEN1); | 416 xPos -= num_partitions * (PART_LEN1); |
338 } | 417 } |
339 float* yf0 = y_fft[0]; | 418 float* yf0 = y_fft[0]; |
340 float* yf1 = y_fft[1]; | 419 float* yf1 = y_fft[1]; |
341 float* aRe = x_fft_buf[0] + xPos; | 420 float* aRe = x_fft_buf[0] + xPos; |
342 float* aIm = x_fft_buf[1] + xPos; | 421 float* aIm = x_fft_buf[1] + xPos; |
343 float* bRe = h_fft_buf[0] + pos; | 422 float* bRe = h_fft_buf[0] + pos; |
344 float* bIm = h_fft_buf[1] + pos; | 423 float* bIm = h_fft_buf[1] + pos; |
345 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; | 424 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; |
346 int len = PART_LEN1 >> 1; | 425 int len = PART_LEN1 >> 1; |
347 | 426 |
348 __asm __volatile ( | 427 __asm __volatile( |
349 ".set push \n\t" | 428 ".set push \n\t" |
350 ".set noreorder \n\t" | 429 ".set noreorder \n\t" |
351 "1: \n\t" | 430 "1: \n\t" |
352 "lwc1 %[f0], 0(%[aRe]) \n\t" | 431 "lwc1 %[f0], 0(%[aRe]) \n\t" |
353 "lwc1 %[f1], 0(%[bRe]) \n\t" | 432 "lwc1 %[f1], 0(%[bRe]) \n\t" |
354 "lwc1 %[f2], 0(%[bIm]) \n\t" | 433 "lwc1 %[f2], 0(%[bIm]) \n\t" |
355 "lwc1 %[f3], 0(%[aIm]) \n\t" | 434 "lwc1 %[f3], 0(%[aIm]) \n\t" |
356 "lwc1 %[f4], 4(%[aRe]) \n\t" | 435 "lwc1 %[f4], 4(%[aRe]) \n\t" |
357 "lwc1 %[f5], 4(%[bRe]) \n\t" | 436 "lwc1 %[f5], 4(%[bRe]) \n\t" |
358 "lwc1 %[f6], 4(%[bIm]) \n\t" | 437 "lwc1 %[f6], 4(%[bIm]) \n\t" |
359 "mul.s %[f8], %[f0], %[f1] \n\t" | 438 "mul.s %[f8], %[f0], %[f1] \n\t" |
360 "mul.s %[f0], %[f0], %[f2] \n\t" | 439 "mul.s %[f0], %[f0], %[f2] \n\t" |
361 "mul.s %[f9], %[f4], %[f5] \n\t" | 440 "mul.s %[f9], %[f4], %[f5] \n\t" |
362 "mul.s %[f4], %[f4], %[f6] \n\t" | 441 "mul.s %[f4], %[f4], %[f6] \n\t" |
363 "lwc1 %[f7], 4(%[aIm]) \n\t" | 442 "lwc1 %[f7], 4(%[aIm]) \n\t" |
364 #if !defined(MIPS32_R2_LE) | 443 #if !defined(MIPS32_R2_LE) |
365 "mul.s %[f12], %[f2], %[f3] \n\t" | 444 "mul.s %[f12], %[f2], %[f3] \n\t" |
366 "mul.s %[f1], %[f3], %[f1] \n\t" | 445 "mul.s %[f1], %[f3], %[f1] \n\t" |
367 "mul.s %[f11], %[f6], %[f7] \n\t" | 446 "mul.s %[f11], %[f6], %[f7] \n\t" |
368 "addiu %[aRe], %[aRe], 8 \n\t" | 447 "addiu %[aRe], %[aRe], 8 \n\t" |
369 "addiu %[aIm], %[aIm], 8 \n\t" | 448 "addiu %[aIm], %[aIm], 8 \n\t" |
370 "addiu %[len], %[len], -1 \n\t" | 449 "addiu %[len], %[len], -1 \n\t" |
371 "sub.s %[f8], %[f8], %[f12] \n\t" | 450 "sub.s %[f8], %[f8], %[f12] \n\t" |
372 "mul.s %[f12], %[f7], %[f5] \n\t" | 451 "mul.s %[f12], %[f7], %[f5] \n\t" |
373 "lwc1 %[f2], 0(%[yf0]) \n\t" | 452 "lwc1 %[f2], 0(%[yf0]) \n\t" |
374 "add.s %[f1], %[f0], %[f1] \n\t" | 453 "add.s %[f1], %[f0], %[f1] \n\t" |
375 "lwc1 %[f3], 0(%[yf1]) \n\t" | 454 "lwc1 %[f3], 0(%[yf1]) \n\t" |
376 "sub.s %[f9], %[f9], %[f11] \n\t" | 455 "sub.s %[f9], %[f9], %[f11] \n\t" |
377 "lwc1 %[f6], 4(%[yf0]) \n\t" | 456 "lwc1 %[f6], 4(%[yf0]) \n\t" |
378 "add.s %[f4], %[f4], %[f12] \n\t" | 457 "add.s %[f4], %[f4], %[f12] \n\t" |
379 #else // #if !defined(MIPS32_R2_LE) | 458 #else // #if !defined(MIPS32_R2_LE) |
380 "addiu %[aRe], %[aRe], 8 \n\t" | 459 "addiu %[aRe], %[aRe], 8 \n\t" |
381 "addiu %[aIm], %[aIm], 8 \n\t" | 460 "addiu %[aIm], %[aIm], 8 \n\t" |
382 "addiu %[len], %[len], -1 \n\t" | 461 "addiu %[len], %[len], -1 \n\t" |
383 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" | 462 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" |
384 "lwc1 %[f2], 0(%[yf0]) \n\t" | 463 "lwc1 %[f2], 0(%[yf0]) \n\t" |
385 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" | 464 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" |
386 "lwc1 %[f3], 0(%[yf1]) \n\t" | 465 "lwc1 %[f3], 0(%[yf1]) \n\t" |
387 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" | 466 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" |
388 "lwc1 %[f6], 4(%[yf0]) \n\t" | 467 "lwc1 %[f6], 4(%[yf0]) \n\t" |
389 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" | 468 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" |
390 #endif // #if !defined(MIPS32_R2_LE) | 469 #endif // #if !defined(MIPS32_R2_LE) |
391 "lwc1 %[f5], 4(%[yf1]) \n\t" | 470 "lwc1 %[f5], 4(%[yf1]) \n\t" |
392 "add.s %[f2], %[f2], %[f8] \n\t" | 471 "add.s %[f2], %[f2], %[f8] \n\t" |
393 "addiu %[bRe], %[bRe], 8 \n\t" | 472 "addiu %[bRe], %[bRe], 8 \n\t" |
394 "addiu %[bIm], %[bIm], 8 \n\t" | 473 "addiu %[bIm], %[bIm], 8 \n\t" |
395 "add.s %[f3], %[f3], %[f1] \n\t" | 474 "add.s %[f3], %[f3], %[f1] \n\t" |
396 "add.s %[f6], %[f6], %[f9] \n\t" | 475 "add.s %[f6], %[f6], %[f9] \n\t" |
397 "add.s %[f5], %[f5], %[f4] \n\t" | 476 "add.s %[f5], %[f5], %[f4] \n\t" |
398 "swc1 %[f2], 0(%[yf0]) \n\t" | 477 "swc1 %[f2], 0(%[yf0]) \n\t" |
399 "swc1 %[f3], 0(%[yf1]) \n\t" | 478 "swc1 %[f3], 0(%[yf1]) \n\t" |
400 "swc1 %[f6], 4(%[yf0]) \n\t" | 479 "swc1 %[f6], 4(%[yf0]) \n\t" |
401 "swc1 %[f5], 4(%[yf1]) \n\t" | 480 "swc1 %[f5], 4(%[yf1]) \n\t" |
402 "addiu %[yf0], %[yf0], 8 \n\t" | 481 "addiu %[yf0], %[yf0], 8 \n\t" |
403 "bgtz %[len], 1b \n\t" | 482 "bgtz %[len], 1b \n\t" |
404 " addiu %[yf1], %[yf1], 8 \n\t" | 483 " addiu %[yf1], %[yf1], 8 \n\t" |
405 "lwc1 %[f0], 0(%[aRe]) \n\t" | 484 "lwc1 %[f0], 0(%[aRe]) \n\t" |
406 "lwc1 %[f1], 0(%[bRe]) \n\t" | 485 "lwc1 %[f1], 0(%[bRe]) \n\t" |
407 "lwc1 %[f2], 0(%[bIm]) \n\t" | 486 "lwc1 %[f2], 0(%[bIm]) \n\t" |
408 "lwc1 %[f3], 0(%[aIm]) \n\t" | 487 "lwc1 %[f3], 0(%[aIm]) \n\t" |
409 "mul.s %[f8], %[f0], %[f1] \n\t" | 488 "mul.s %[f8], %[f0], %[f1] \n\t" |
410 "mul.s %[f0], %[f0], %[f2] \n\t" | 489 "mul.s %[f0], %[f0], %[f2] \n\t" |
411 #if !defined(MIPS32_R2_LE) | 490 #if !defined(MIPS32_R2_LE) |
412 "mul.s %[f12], %[f2], %[f3] \n\t" | 491 "mul.s %[f12], %[f2], %[f3] \n\t" |
413 "mul.s %[f1], %[f3], %[f1] \n\t" | 492 "mul.s %[f1], %[f3], %[f1] \n\t" |
414 "sub.s %[f8], %[f8], %[f12] \n\t" | 493 "sub.s %[f8], %[f8], %[f12] \n\t" |
415 "lwc1 %[f2], 0(%[yf0]) \n\t" | 494 "lwc1 %[f2], 0(%[yf0]) \n\t" |
416 "add.s %[f1], %[f0], %[f1] \n\t" | 495 "add.s %[f1], %[f0], %[f1] \n\t" |
417 "lwc1 %[f3], 0(%[yf1]) \n\t" | 496 "lwc1 %[f3], 0(%[yf1]) \n\t" |
418 #else // #if !defined(MIPS32_R2_LE) | 497 #else // #if !defined(MIPS32_R2_LE) |
419 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" | 498 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" |
420 "lwc1 %[f2], 0(%[yf0]) \n\t" | 499 "lwc1 %[f2], 0(%[yf0]) \n\t" |
421 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" | 500 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" |
422 "lwc1 %[f3], 0(%[yf1]) \n\t" | 501 "lwc1 %[f3], 0(%[yf1]) \n\t" |
423 #endif // #if !defined(MIPS32_R2_LE) | 502 #endif // #if !defined(MIPS32_R2_LE) |
424 "add.s %[f2], %[f2], %[f8] \n\t" | 503 "add.s %[f2], %[f2], %[f8] \n\t" |
425 "add.s %[f3], %[f3], %[f1] \n\t" | 504 "add.s %[f3], %[f3], %[f1] \n\t" |
426 "swc1 %[f2], 0(%[yf0]) \n\t" | 505 "swc1 %[f2], 0(%[yf0]) \n\t" |
427 "swc1 %[f3], 0(%[yf1]) \n\t" | 506 "swc1 %[f3], 0(%[yf1]) \n\t" |
428 ".set pop \n\t" | 507 ".set pop \n\t" |
429 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | 508 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3), |
430 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | 509 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7), |
431 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), | 510 [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11), |
432 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), | 511 [f12] "=&f"(f12), [f13] "=&f"(f13), [aRe] "+r"(aRe), [aIm] "+r"(aIm), |
433 [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), | 512 [bRe] "+r"(bRe), [bIm] "+r"(bIm), [yf0] "+r"(yf0), [yf1] "+r"(yf1), |
434 [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), | 513 [len] "+r"(len) |
435 [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) | 514 : |
436 : | 515 : "memory"); |
437 : "memory" | |
438 ); | |
439 } | 516 } |
440 } | 517 } |
441 | 518 |
442 void WebRtcAec_FilterAdaptation_mips( | 519 void WebRtcAec_FilterAdaptation_mips( |
443 int num_partitions, | 520 int num_partitions, |
444 int x_fft_buf_block_pos, | 521 int x_fft_buf_block_pos, |
445 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | 522 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], |
446 float e_fft[2][PART_LEN1], | 523 float e_fft[2][PART_LEN1], |
447 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { | 524 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { |
448 float fft[PART_LEN2]; | 525 float fft[PART_LEN2]; |
449 int i; | 526 int i; |
450 for (i = 0; i < num_partitions; i++) { | 527 for (i = 0; i < num_partitions; i++) { |
451 int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1); | 528 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); |
452 int pos; | 529 int pos; |
453 // Check for wrap | 530 // Check for wrap |
454 if (i + x_fft_buf_block_pos >= num_partitions) { | 531 if (i + x_fft_buf_block_pos >= num_partitions) { |
455 xPos -= num_partitions * PART_LEN1; | 532 xPos -= num_partitions * PART_LEN1; |
456 } | 533 } |
457 | 534 |
458 pos = i * PART_LEN1; | 535 pos = i * PART_LEN1; |
459 float* aRe = x_fft_buf[0] + xPos; | 536 float* aRe = x_fft_buf[0] + xPos; |
460 float* aIm = x_fft_buf[1] + xPos; | 537 float* aIm = x_fft_buf[1] + xPos; |
461 float* bRe = e_fft[0]; | 538 float* bRe = e_fft[0]; |
462 float* bIm = e_fft[1]; | 539 float* bIm = e_fft[1]; |
463 float* fft_tmp; | 540 float* fft_tmp; |
464 | 541 |
465 float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; | 542 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12; |
466 int len = PART_LEN >> 1; | 543 int len = PART_LEN >> 1; |
467 | 544 |
468 __asm __volatile ( | 545 __asm __volatile( |
469 ".set push \n\t" | 546 ".set push \n\t" |
470 ".set noreorder \n\t" | 547 ".set noreorder \n\t" |
471 "addiu %[fft_tmp], %[fft], 0 \n\t" | 548 "addiu %[fft_tmp], %[fft], 0 \n\t" |
472 "1: \n\t" | 549 "1: \n\t" |
473 "lwc1 %[f0], 0(%[aRe]) \n\t" | 550 "lwc1 %[f0], 0(%[aRe]) \n\t" |
474 "lwc1 %[f1], 0(%[bRe]) \n\t" | 551 "lwc1 %[f1], 0(%[bRe]) \n\t" |
475 "lwc1 %[f2], 0(%[bIm]) \n\t" | 552 "lwc1 %[f2], 0(%[bIm]) \n\t" |
476 "lwc1 %[f4], 4(%[aRe]) \n\t" | 553 "lwc1 %[f4], 4(%[aRe]) \n\t" |
477 "lwc1 %[f5], 4(%[bRe]) \n\t" | 554 "lwc1 %[f5], 4(%[bRe]) \n\t" |
478 "lwc1 %[f6], 4(%[bIm]) \n\t" | 555 "lwc1 %[f6], 4(%[bIm]) \n\t" |
479 "addiu %[aRe], %[aRe], 8 \n\t" | 556 "addiu %[aRe], %[aRe], 8 \n\t" |
480 "addiu %[bRe], %[bRe], 8 \n\t" | 557 "addiu %[bRe], %[bRe], 8 \n\t" |
481 "mul.s %[f8], %[f0], %[f1] \n\t" | 558 "mul.s %[f8], %[f0], %[f1] \n\t" |
482 "mul.s %[f0], %[f0], %[f2] \n\t" | 559 "mul.s %[f0], %[f0], %[f2] \n\t" |
483 "lwc1 %[f3], 0(%[aIm]) \n\t" | 560 "lwc1 %[f3], 0(%[aIm]) \n\t" |
484 "mul.s %[f9], %[f4], %[f5] \n\t" | 561 "mul.s %[f9], %[f4], %[f5] \n\t" |
485 "lwc1 %[f7], 4(%[aIm]) \n\t" | 562 "lwc1 %[f7], 4(%[aIm]) \n\t" |
486 "mul.s %[f4], %[f4], %[f6] \n\t" | 563 "mul.s %[f4], %[f4], %[f6] \n\t" |
487 #if !defined(MIPS32_R2_LE) | 564 #if !defined(MIPS32_R2_LE) |
488 "mul.s %[f10], %[f3], %[f2] \n\t" | 565 "mul.s %[f10], %[f3], %[f2] \n\t" |
489 "mul.s %[f1], %[f3], %[f1] \n\t" | 566 "mul.s %[f1], %[f3], %[f1] \n\t" |
490 "mul.s %[f11], %[f7], %[f6] \n\t" | 567 "mul.s %[f11], %[f7], %[f6] \n\t" |
491 "mul.s %[f5], %[f7], %[f5] \n\t" | 568 "mul.s %[f5], %[f7], %[f5] \n\t" |
492 "addiu %[aIm], %[aIm], 8 \n\t" | 569 "addiu %[aIm], %[aIm], 8 \n\t" |
493 "addiu %[bIm], %[bIm], 8 \n\t" | 570 "addiu %[bIm], %[bIm], 8 \n\t" |
494 "addiu %[len], %[len], -1 \n\t" | 571 "addiu %[len], %[len], -1 \n\t" |
495 "add.s %[f8], %[f8], %[f10] \n\t" | 572 "add.s %[f8], %[f8], %[f10] \n\t" |
496 "sub.s %[f1], %[f0], %[f1] \n\t" | 573 "sub.s %[f1], %[f0], %[f1] \n\t" |
497 "add.s %[f9], %[f9], %[f11] \n\t" | 574 "add.s %[f9], %[f9], %[f11] \n\t" |
498 "sub.s %[f5], %[f4], %[f5] \n\t" | 575 "sub.s %[f5], %[f4], %[f5] \n\t" |
499 #else // #if !defined(MIPS32_R2_LE) | 576 #else // #if !defined(MIPS32_R2_LE) |
500 "addiu %[aIm], %[aIm], 8 \n\t" | 577 "addiu %[aIm], %[aIm], 8 \n\t" |
501 "addiu %[bIm], %[bIm], 8 \n\t" | 578 "addiu %[bIm], %[bIm], 8 \n\t" |
502 "addiu %[len], %[len], -1 \n\t" | 579 "addiu %[len], %[len], -1 \n\t" |
503 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" | 580 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" |
504 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" | 581 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" |
505 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" | 582 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" |
506 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" | 583 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" |
507 #endif // #if !defined(MIPS32_R2_LE) | 584 #endif // #if !defined(MIPS32_R2_LE) |
508 "swc1 %[f8], 0(%[fft_tmp]) \n\t" | 585 "swc1 %[f8], 0(%[fft_tmp]) \n\t" |
509 "swc1 %[f1], 4(%[fft_tmp]) \n\t" | 586 "swc1 %[f1], 4(%[fft_tmp]) \n\t" |
510 "swc1 %[f9], 8(%[fft_tmp]) \n\t" | 587 "swc1 %[f9], 8(%[fft_tmp]) \n\t" |
511 "swc1 %[f5], 12(%[fft_tmp]) \n\t" | 588 "swc1 %[f5], 12(%[fft_tmp]) \n\t" |
512 "bgtz %[len], 1b \n\t" | 589 "bgtz %[len], 1b \n\t" |
513 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" | 590 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" |
514 "lwc1 %[f0], 0(%[aRe]) \n\t" | 591 "lwc1 %[f0], 0(%[aRe]) \n\t" |
515 "lwc1 %[f1], 0(%[bRe]) \n\t" | 592 "lwc1 %[f1], 0(%[bRe]) \n\t" |
516 "lwc1 %[f2], 0(%[bIm]) \n\t" | 593 "lwc1 %[f2], 0(%[bIm]) \n\t" |
517 "lwc1 %[f3], 0(%[aIm]) \n\t" | 594 "lwc1 %[f3], 0(%[aIm]) \n\t" |
518 "mul.s %[f8], %[f0], %[f1] \n\t" | 595 "mul.s %[f8], %[f0], %[f1] \n\t" |
519 #if !defined(MIPS32_R2_LE) | 596 #if !defined(MIPS32_R2_LE) |
520 "mul.s %[f10], %[f3], %[f2] \n\t" | 597 "mul.s %[f10], %[f3], %[f2] \n\t" |
521 "add.s %[f8], %[f8], %[f10] \n\t" | 598 "add.s %[f8], %[f8], %[f10] \n\t" |
522 #else // #if !defined(MIPS32_R2_LE) | 599 #else // #if !defined(MIPS32_R2_LE) |
523 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" | 600 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" |
524 #endif // #if !defined(MIPS32_R2_LE) | 601 #endif // #if !defined(MIPS32_R2_LE) |
525 "swc1 %[f8], 4(%[fft]) \n\t" | 602 "swc1 %[f8], 4(%[fft]) \n\t" |
526 ".set pop \n\t" | 603 ".set pop \n\t" |
527 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | 604 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3), |
528 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | 605 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7), |
529 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), | 606 [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11), |
530 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), | 607 [f12] "=&f"(f12), [aRe] "+r"(aRe), [aIm] "+r"(aIm), [bRe] "+r"(bRe), |
531 [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), | 608 [bIm] "+r"(bIm), [fft_tmp] "=&r"(fft_tmp), [len] "+r"(len) |
532 [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), | 609 : [fft] "r"(fft) |
533 [len] "+r" (len) | 610 : "memory"); |
534 : [fft] "r" (fft) | |
535 : "memory" | |
536 ); | |
537 | 611 |
538 aec_rdft_inverse_128(fft); | 612 aec_rdft_inverse_128(fft); |
539 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); | 613 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); |
540 | 614 |
541 // fft scaling | 615 // fft scaling |
542 { | 616 { |
543 float scale = 2.0f / PART_LEN2; | 617 float scale = 2.0f / PART_LEN2; |
544 __asm __volatile ( | 618 __asm __volatile( |
545 ".set push \n\t" | 619 ".set push \n\t" |
546 ".set noreorder \n\t" | 620 ".set noreorder \n\t" |
547 "addiu %[fft_tmp], %[fft], 0 \n\t" | 621 "addiu %[fft_tmp], %[fft], 0 \n\t" |
548 "addiu %[len], $zero, 8 \n\t" | 622 "addiu %[len], $zero, 8 \n\t" |
549 "1: \n\t" | 623 "1: \n\t" |
550 "addiu %[len], %[len], -1 \n\t" | 624 "addiu %[len], %[len], -1 \n\t" |
551 "lwc1 %[f0], 0(%[fft_tmp]) \n\t" | 625 "lwc1 %[f0], 0(%[fft_tmp]) \n\t" |
552 "lwc1 %[f1], 4(%[fft_tmp]) \n\t" | 626 "lwc1 %[f1], 4(%[fft_tmp]) \n\t" |
553 "lwc1 %[f2], 8(%[fft_tmp]) \n\t" | 627 "lwc1 %[f2], 8(%[fft_tmp]) \n\t" |
554 "lwc1 %[f3], 12(%[fft_tmp]) \n\t" | 628 "lwc1 %[f3], 12(%[fft_tmp]) \n\t" |
555 "mul.s %[f0], %[f0], %[scale] \n\t" | 629 "mul.s %[f0], %[f0], %[scale] \n\t" |
556 "mul.s %[f1], %[f1], %[scale] \n\t" | 630 "mul.s %[f1], %[f1], %[scale] \n\t" |
557 "mul.s %[f2], %[f2], %[scale] \n\t" | 631 "mul.s %[f2], %[f2], %[scale] \n\t" |
558 "mul.s %[f3], %[f3], %[scale] \n\t" | 632 "mul.s %[f3], %[f3], %[scale] \n\t" |
559 "lwc1 %[f4], 16(%[fft_tmp]) \n\t" | 633 "lwc1 %[f4], 16(%[fft_tmp]) \n\t" |
560 "lwc1 %[f5], 20(%[fft_tmp]) \n\t" | 634 "lwc1 %[f5], 20(%[fft_tmp]) \n\t" |
561 "lwc1 %[f6], 24(%[fft_tmp]) \n\t" | 635 "lwc1 %[f6], 24(%[fft_tmp]) \n\t" |
562 "lwc1 %[f7], 28(%[fft_tmp]) \n\t" | 636 "lwc1 %[f7], 28(%[fft_tmp]) \n\t" |
563 "mul.s %[f4], %[f4], %[scale] \n\t" | 637 "mul.s %[f4], %[f4], %[scale] \n\t" |
564 "mul.s %[f5], %[f5], %[scale] \n\t" | 638 "mul.s %[f5], %[f5], %[scale] \n\t" |
565 "mul.s %[f6], %[f6], %[scale] \n\t" | 639 "mul.s %[f6], %[f6], %[scale] \n\t" |
566 "mul.s %[f7], %[f7], %[scale] \n\t" | 640 "mul.s %[f7], %[f7], %[scale] \n\t" |
567 "swc1 %[f0], 0(%[fft_tmp]) \n\t" | 641 "swc1 %[f0], 0(%[fft_tmp]) \n\t" |
568 "swc1 %[f1], 4(%[fft_tmp]) \n\t" | 642 "swc1 %[f1], 4(%[fft_tmp]) \n\t" |
569 "swc1 %[f2], 8(%[fft_tmp]) \n\t" | 643 "swc1 %[f2], 8(%[fft_tmp]) \n\t" |
570 "swc1 %[f3], 12(%[fft_tmp]) \n\t" | 644 "swc1 %[f3], 12(%[fft_tmp]) \n\t" |
571 "swc1 %[f4], 16(%[fft_tmp]) \n\t" | 645 "swc1 %[f4], 16(%[fft_tmp]) \n\t" |
572 "swc1 %[f5], 20(%[fft_tmp]) \n\t" | 646 "swc1 %[f5], 20(%[fft_tmp]) \n\t" |
573 "swc1 %[f6], 24(%[fft_tmp]) \n\t" | 647 "swc1 %[f6], 24(%[fft_tmp]) \n\t" |
574 "swc1 %[f7], 28(%[fft_tmp]) \n\t" | 648 "swc1 %[f7], 28(%[fft_tmp]) \n\t" |
575 "bgtz %[len], 1b \n\t" | 649 "bgtz %[len], 1b \n\t" |
576 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" | 650 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" |
577 ".set pop \n\t" | 651 ".set pop \n\t" |
578 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | 652 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3), |
579 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | 653 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7), |
580 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), | 654 [len] "=&r"(len), [fft_tmp] "=&r"(fft_tmp) |
581 [fft_tmp] "=&r" (fft_tmp) | 655 : [scale] "f"(scale), [fft] "r"(fft) |
582 : [scale] "f" (scale), [fft] "r" (fft) | 656 : "memory"); |
583 : "memory" | |
584 ); | |
585 } | 657 } |
586 aec_rdft_forward_128(fft); | 658 aec_rdft_forward_128(fft); |
587 aRe = h_fft_buf[0] + pos; | 659 aRe = h_fft_buf[0] + pos; |
588 aIm = h_fft_buf[1] + pos; | 660 aIm = h_fft_buf[1] + pos; |
589 __asm __volatile ( | 661 __asm __volatile( |
590 ".set push \n\t" | 662 ".set push \n\t" |
591 ".set noreorder \n\t" | 663 ".set noreorder \n\t" |
592 "addiu %[fft_tmp], %[fft], 0 \n\t" | 664 "addiu %[fft_tmp], %[fft], 0 \n\t" |
593 "addiu %[len], $zero, 31 \n\t" | 665 "addiu %[len], $zero, 31 \n\t" |
594 "lwc1 %[f0], 0(%[aRe]) \n\t" | 666 "lwc1 %[f0], 0(%[aRe]) \n\t" |
595 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" | 667 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" |
596 "lwc1 %[f2], 256(%[aRe]) \n\t" | 668 "lwc1 %[f2], 256(%[aRe]) \n\t" |
597 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" | 669 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" |
598 "lwc1 %[f4], 4(%[aRe]) \n\t" | 670 "lwc1 %[f4], 4(%[aRe]) \n\t" |
599 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" | 671 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" |
600 "lwc1 %[f6], 4(%[aIm]) \n\t" | 672 "lwc1 %[f6], 4(%[aIm]) \n\t" |
601 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" | 673 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" |
602 "add.s %[f0], %[f0], %[f1] \n\t" | 674 "add.s %[f0], %[f0], %[f1] \n\t" |
603 "add.s %[f2], %[f2], %[f3] \n\t" | 675 "add.s %[f2], %[f2], %[f3] \n\t" |
604 "add.s %[f4], %[f4], %[f5] \n\t" | 676 "add.s %[f4], %[f4], %[f5] \n\t" |
605 "add.s %[f6], %[f6], %[f7] \n\t" | 677 "add.s %[f6], %[f6], %[f7] \n\t" |
606 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" | 678 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" |
607 "swc1 %[f0], 0(%[aRe]) \n\t" | 679 "swc1 %[f0], 0(%[aRe]) \n\t" |
608 "swc1 %[f2], 256(%[aRe]) \n\t" | 680 "swc1 %[f2], 256(%[aRe]) \n\t" |
609 "swc1 %[f4], 4(%[aRe]) \n\t" | 681 "swc1 %[f4], 4(%[aRe]) \n\t" |
610 "addiu %[aRe], %[aRe], 8 \n\t" | 682 "addiu %[aRe], %[aRe], 8 \n\t" |
611 "swc1 %[f6], 4(%[aIm]) \n\t" | 683 "swc1 %[f6], 4(%[aIm]) \n\t" |
612 "addiu %[aIm], %[aIm], 8 \n\t" | 684 "addiu %[aIm], %[aIm], 8 \n\t" |
613 "1: \n\t" | 685 "1: \n\t" |
614 "lwc1 %[f0], 0(%[aRe]) \n\t" | 686 "lwc1 %[f0], 0(%[aRe]) \n\t" |
615 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" | 687 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" |
616 "lwc1 %[f2], 0(%[aIm]) \n\t" | 688 "lwc1 %[f2], 0(%[aIm]) \n\t" |
617 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" | 689 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" |
618 "lwc1 %[f4], 4(%[aRe]) \n\t" | 690 "lwc1 %[f4], 4(%[aRe]) \n\t" |
619 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" | 691 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" |
620 "lwc1 %[f6], 4(%[aIm]) \n\t" | 692 "lwc1 %[f6], 4(%[aIm]) \n\t" |
621 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" | 693 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" |
622 "add.s %[f0], %[f0], %[f1] \n\t" | 694 "add.s %[f0], %[f0], %[f1] \n\t" |
623 "add.s %[f2], %[f2], %[f3] \n\t" | 695 "add.s %[f2], %[f2], %[f3] \n\t" |
624 "add.s %[f4], %[f4], %[f5] \n\t" | 696 "add.s %[f4], %[f4], %[f5] \n\t" |
625 "add.s %[f6], %[f6], %[f7] \n\t" | 697 "add.s %[f6], %[f6], %[f7] \n\t" |
626 "addiu %[len], %[len], -1 \n\t" | 698 "addiu %[len], %[len], -1 \n\t" |
627 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" | 699 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" |
628 "swc1 %[f0], 0(%[aRe]) \n\t" | 700 "swc1 %[f0], 0(%[aRe]) \n\t" |
629 "swc1 %[f2], 0(%[aIm]) \n\t" | 701 "swc1 %[f2], 0(%[aIm]) \n\t" |
630 "swc1 %[f4], 4(%[aRe]) \n\t" | 702 "swc1 %[f4], 4(%[aRe]) \n\t" |
631 "addiu %[aRe], %[aRe], 8 \n\t" | 703 "addiu %[aRe], %[aRe], 8 \n\t" |
632 "swc1 %[f6], 4(%[aIm]) \n\t" | 704 "swc1 %[f6], 4(%[aIm]) \n\t" |
633 "bgtz %[len], 1b \n\t" | 705 "bgtz %[len], 1b \n\t" |
634 " addiu %[aIm], %[aIm], 8 \n\t" | 706 " addiu %[aIm], %[aIm], 8 \n\t" |
635 ".set pop \n\t" | 707 ".set pop \n\t" |
636 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | 708 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3), |
637 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | 709 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7), |
638 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), | 710 [len] "=&r"(len), [fft_tmp] "=&r"(fft_tmp), [aRe] "+r"(aRe), |
639 [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) | 711 [aIm] "+r"(aIm) |
640 : [fft] "r" (fft) | 712 : [fft] "r"(fft) |
641 : "memory" | 713 : "memory"); |
642 ); | |
643 } | 714 } |
644 } | 715 } |
645 | 716 |
646 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, | 717 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, |
647 float hNl[PART_LEN1], | 718 float hNl[PART_LEN1], |
648 const float hNlFb, | 719 const float hNlFb, |
649 float efw[2][PART_LEN1]) { | 720 float efw[2][PART_LEN1]) { |
650 int i; | 721 int i; |
651 const float one = 1.0; | 722 const float one = 1.0; |
652 float* p_hNl; | 723 float* p_hNl; |
653 float* p_efw0; | 724 float* p_efw0; |
654 float* p_efw1; | 725 float* p_efw1; |
655 float* p_WebRtcAec_wC; | 726 float* p_WebRtcAec_wC; |
656 float temp1, temp2, temp3, temp4; | 727 float temp1, temp2, temp3, temp4; |
657 | 728 |
658 p_hNl = &hNl[0]; | 729 p_hNl = &hNl[0]; |
659 p_efw0 = &efw[0][0]; | 730 p_efw0 = &efw[0][0]; |
660 p_efw1 = &efw[1][0]; | 731 p_efw1 = &efw[1][0]; |
661 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; | 732 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; |
662 | 733 |
663 for (i = 0; i < PART_LEN1; i++) { | 734 for (i = 0; i < PART_LEN1; i++) { |
664 // Weight subbands | 735 // Weight subbands |
665 __asm __volatile ( | 736 __asm __volatile( |
666 ".set push \n\t" | 737 ".set push \n\t" |
667 ".set noreorder \n\t" | 738 ".set noreorder \n\t" |
668 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" | 739 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" |
669 "lwc1 %[temp2], 0(%[p_wC]) \n\t" | 740 "lwc1 %[temp2], 0(%[p_wC]) \n\t" |
670 "c.lt.s %[hNlFb], %[temp1] \n\t" | 741 "c.lt.s %[hNlFb], %[temp1] \n\t" |
671 "bc1f 1f \n\t" | 742 "bc1f 1f \n\t" |
672 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" | 743 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" |
673 "sub.s %[temp4], %[one], %[temp2] \n\t" | 744 "sub.s %[temp4], %[one], %[temp2] \n\t" |
674 #if !defined(MIPS32_R2_LE) | 745 #if !defined(MIPS32_R2_LE) |
675 "mul.s %[temp1], %[temp1], %[temp4] \n\t" | 746 "mul.s %[temp1], %[temp1], %[temp4] \n\t" |
676 "add.s %[temp1], %[temp3], %[temp1] \n\t" | 747 "add.s %[temp1], %[temp3], %[temp1] \n\t" |
677 #else // #if !defined(MIPS32_R2_LE) | 748 #else // #if !defined(MIPS32_R2_LE) |
678 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" | 749 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" |
679 #endif // #if !defined(MIPS32_R2_LE) | 750 #endif // #if !defined(MIPS32_R2_LE) |
680 "swc1 %[temp1], 0(%[p_hNl]) \n\t" | 751 "swc1 %[temp1], 0(%[p_hNl]) \n\t" |
681 "1: \n\t" | 752 "1: \n\t" |
682 "addiu %[p_wC], %[p_wC], 4 \n\t" | 753 "addiu %[p_wC], %[p_wC], 4 \n\t" |
683 ".set pop \n\t" | 754 ".set pop \n\t" |
684 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), | 755 : [temp1] "=&f"(temp1), [temp2] "=&f"(temp2), [temp3] "=&f"(temp3), |
685 [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) | 756 [temp4] "=&f"(temp4), [p_wC] "+r"(p_WebRtcAec_wC) |
686 : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) | 757 : [hNlFb] "f"(hNlFb), [one] "f"(one), [p_hNl] "r"(p_hNl) |
687 : "memory" | 758 : "memory"); |
688 ); | |
689 | 759 |
690 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); | 760 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); |
691 | 761 |
692 __asm __volatile ( | 762 __asm __volatile( |
693 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" | 763 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" |
694 "lwc1 %[temp3], 0(%[p_efw1]) \n\t" | 764 "lwc1 %[temp3], 0(%[p_efw1]) \n\t" |
695 "lwc1 %[temp2], 0(%[p_efw0]) \n\t" | 765 "lwc1 %[temp2], 0(%[p_efw0]) \n\t" |
696 "addiu %[p_hNl], %[p_hNl], 4 \n\t" | 766 "addiu %[p_hNl], %[p_hNl], 4 \n\t" |
697 "mul.s %[temp3], %[temp3], %[temp1] \n\t" | 767 "mul.s %[temp3], %[temp3], %[temp1] \n\t" |
698 "mul.s %[temp2], %[temp2], %[temp1] \n\t" | 768 "mul.s %[temp2], %[temp2], %[temp1] \n\t" |
699 "addiu %[p_efw0], %[p_efw0], 4 \n\t" | 769 "addiu %[p_efw0], %[p_efw0], 4 \n\t" |
700 "addiu %[p_efw1], %[p_efw1], 4 \n\t" | 770 "addiu %[p_efw1], %[p_efw1], 4 \n\t" |
701 "neg.s %[temp4], %[temp3] \n\t" | 771 "neg.s %[temp4], %[temp3] \n\t" |
702 "swc1 %[temp2], -4(%[p_efw0]) \n\t" | 772 "swc1 %[temp2], -4(%[p_efw0]) \n\t" |
703 "swc1 %[temp4], -4(%[p_efw1]) \n\t" | 773 "swc1 %[temp4], -4(%[p_efw1]) \n\t" |
704 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), | 774 : [temp1] "=&f"(temp1), [temp2] "=&f"(temp2), [temp3] "=&f"(temp3), |
705 [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), | 775 [temp4] "=&f"(temp4), [p_efw0] "+r"(p_efw0), [p_efw1] "+r"(p_efw1), |
706 [p_hNl] "+r" (p_hNl) | 776 [p_hNl] "+r"(p_hNl) |
707 : | 777 : |
708 : "memory" | 778 : "memory"); |
709 ); | |
710 } | 779 } |
711 } | 780 } |
712 | 781 |
713 void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, | 782 void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, |
714 float normal_mu, | 783 float normal_mu, |
715 float normal_error_threshold, | 784 float normal_error_threshold, |
716 float x_pow[PART_LEN1], | 785 float x_pow[PART_LEN1], |
717 float ef[2][PART_LEN1]) { | 786 float ef[2][PART_LEN1]) { |
718 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; | 787 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; |
719 const float error_threshold = extended_filter_enabled | 788 const float error_threshold = extended_filter_enabled |
720 ? kExtendedErrorThreshold | 789 ? kExtendedErrorThreshold |
721 : normal_error_threshold; | 790 : normal_error_threshold; |
722 int len = (PART_LEN1); | 791 int len = (PART_LEN1); |
723 float* ef0 = ef[0]; | 792 float* ef0 = ef[0]; |
724 float* ef1 = ef[1]; | 793 float* ef1 = ef[1]; |
725 float fac1 = 1e-10f; | 794 float fac1 = 1e-10f; |
726 float err_th2 = error_threshold * error_threshold; | 795 float err_th2 = error_threshold * error_threshold; |
727 float f0, f1, f2; | 796 float f0, f1, f2; |
728 #if !defined(MIPS32_R2_LE) | 797 #if !defined(MIPS32_R2_LE) |
729 float f3; | 798 float f3; |
730 #endif | 799 #endif |
731 | 800 |
732 __asm __volatile ( | 801 __asm __volatile( |
733 ".set push \n\t" | 802 ".set push \n\t" |
734 ".set noreorder \n\t" | 803 ".set noreorder \n\t" |
735 "1: \n\t" | 804 "1: \n\t" |
736 "lwc1 %[f0], 0(%[x_pow]) \n\t" | 805 "lwc1 %[f0], 0(%[x_pow]) \n\t" |
737 "lwc1 %[f1], 0(%[ef0]) \n\t" | 806 "lwc1 %[f1], 0(%[ef0]) \n\t" |
738 "lwc1 %[f2], 0(%[ef1]) \n\t" | 807 "lwc1 %[f2], 0(%[ef1]) \n\t" |
739 "add.s %[f0], %[f0], %[fac1] \n\t" | 808 "add.s %[f0], %[f0], %[fac1] \n\t" |
740 "div.s %[f1], %[f1], %[f0] \n\t" | 809 "div.s %[f1], %[f1], %[f0] \n\t" |
741 "div.s %[f2], %[f2], %[f0] \n\t" | 810 "div.s %[f2], %[f2], %[f0] \n\t" |
742 "mul.s %[f0], %[f1], %[f1] \n\t" | 811 "mul.s %[f0], %[f1], %[f1] \n\t" |
743 #if defined(MIPS32_R2_LE) | 812 #if defined(MIPS32_R2_LE) |
744 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" | 813 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" |
745 #else | 814 #else |
746 "mul.s %[f3], %[f2], %[f2] \n\t" | 815 "mul.s %[f3], %[f2], %[f2] \n\t" |
747 "add.s %[f0], %[f0], %[f3] \n\t" | 816 "add.s %[f0], %[f0], %[f3] \n\t" |
748 #endif | 817 #endif |
749 "c.le.s %[f0], %[err_th2] \n\t" | 818 "c.le.s %[f0], %[err_th2] \n\t" |
750 "nop \n\t" | 819 "nop \n\t" |
751 "bc1t 2f \n\t" | 820 "bc1t 2f \n\t" |
752 " nop \n\t" | 821 " nop \n\t" |
753 "sqrt.s %[f0], %[f0] \n\t" | 822 "sqrt.s %[f0], %[f0] \n\t" |
754 "add.s %[f0], %[f0], %[fac1] \n\t" | 823 "add.s %[f0], %[f0], %[fac1] \n\t" |
755 "div.s %[f0], %[err_th], %[f0] \n\t" | 824 "div.s %[f0], %[err_th], %[f0] \n\t" |
756 "mul.s %[f1], %[f1], %[f0] \n\t" | 825 "mul.s %[f1], %[f1], %[f0] \n\t" |
757 "mul.s %[f2], %[f2], %[f0] \n\t" | 826 "mul.s %[f2], %[f2], %[f0] \n\t" |
758 "2: \n\t" | 827 "2: \n\t" |
759 "mul.s %[f1], %[f1], %[mu] \n\t" | 828 "mul.s %[f1], %[f1], %[mu] \n\t" |
760 "mul.s %[f2], %[f2], %[mu] \n\t" | 829 "mul.s %[f2], %[f2], %[mu] \n\t" |
761 "swc1 %[f1], 0(%[ef0]) \n\t" | 830 "swc1 %[f1], 0(%[ef0]) \n\t" |
762 "swc1 %[f2], 0(%[ef1]) \n\t" | 831 "swc1 %[f2], 0(%[ef1]) \n\t" |
763 "addiu %[len], %[len], -1 \n\t" | 832 "addiu %[len], %[len], -1 \n\t" |
764 "addiu %[x_pow], %[x_pow], 4 \n\t" | 833 "addiu %[x_pow], %[x_pow], 4 \n\t" |
765 "addiu %[ef0], %[ef0], 4 \n\t" | 834 "addiu %[ef0], %[ef0], 4 \n\t" |
766 "bgtz %[len], 1b \n\t" | 835 "bgtz %[len], 1b \n\t" |
767 " addiu %[ef1], %[ef1], 4 \n\t" | 836 " addiu %[ef1], %[ef1], 4 \n\t" |
768 ".set pop \n\t" | 837 ".set pop \n\t" |
769 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | 838 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), |
770 #if !defined(MIPS32_R2_LE) | 839 #if !defined(MIPS32_R2_LE) |
771 [f3] "=&f" (f3), | 840 [f3] "=&f"(f3), |
772 #endif | 841 #endif |
773 [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), | 842 [x_pow] "+r"(x_pow), [ef0] "+r"(ef0), [ef1] "+r"(ef1), [len] "+r"(len) |
774 [len] "+r" (len) | 843 : [fac1] "f"(fac1), [err_th2] "f"(err_th2), [mu] "f"(mu), |
775 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), | 844 [err_th] "f"(error_threshold) |
776 [err_th] "f" (error_threshold) | 845 : "memory"); |
777 : "memory" | |
778 ); | |
779 } | 846 } |
780 | 847 |
781 void WebRtcAec_InitAec_mips(void) { | 848 void WebRtcAec_InitAec_mips(void) { |
782 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; | 849 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; |
783 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; | 850 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; |
784 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; | 851 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; |
785 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; | 852 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; |
786 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; | 853 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; |
787 } | 854 } |
OLD | NEW |