OLD | NEW |
| (Empty) |
1 /* | |
2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html | |
3 * Copyright Takuya OOURA, 1996-2001 | |
4 * | |
5 * You may use, copy, modify and distribute this code for any purpose (include | |
6 * commercial use) and without fee. Please refer to this package when you modify | |
7 * this code. | |
8 * | |
9 * Changes by the WebRTC authors: | |
10 * - Trivial type modifications. | |
11 * - Minimal code subset to do rdft of length 128. | |
12 * - Optimizations because of known length. | |
13 * | |
14 * All changes are covered by the WebRTC license and IP grant: | |
15 * Use of this source code is governed by a BSD-style license | |
16 * that can be found in the LICENSE file in the root of the source | |
17 * tree. An additional intellectual property rights grant can be found | |
18 * in the file PATENTS. All contributing project authors may | |
19 * be found in the AUTHORS file in the root of the source tree. | |
20 */ | |
21 | |
22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" | |
23 | |
24 #include <math.h> | |
25 | |
26 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" | |
27 #include "webrtc/typedefs.h" | |
28 | |
29 // These tables used to be computed at run-time. For example, refer to: | |
30 // https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_pro
cessing/aec/aec_rdft.c?r=6564 | |
31 // to see the initialization code. | |
32 const float rdft_w[64] = { | |
33 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, | |
34 0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f, | |
35 0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f, | |
36 0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f, | |
37 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, | |
38 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, | |
39 0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f, | |
40 0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f, | |
41 0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f, | |
42 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, | |
43 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, | |
44 0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f, | |
45 0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f, | |
46 0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f, | |
47 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, | |
48 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, | |
49 }; | |
50 const float rdft_wk3ri_first[16] = { | |
51 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, | |
52 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, | |
53 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, | |
54 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, | |
55 }; | |
56 const float rdft_wk3ri_second[16] = { | |
57 -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, | |
58 -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, | |
59 -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, | |
60 -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, | |
61 }; | |
62 ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { | |
63 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, | |
64 0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f, | |
65 0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f, | |
66 0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f, | |
67 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, | |
68 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, | |
69 0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f, | |
70 0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f, | |
71 }; | |
72 ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { | |
73 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, | |
74 0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f, | |
75 0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f, | |
76 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, | |
77 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, | |
78 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, | |
79 0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f, | |
80 0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f, | |
81 }; | |
82 ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { | |
83 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, | |
84 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, | |
85 0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f, | |
86 -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f, | |
87 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, | |
88 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, | |
89 0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f, | |
90 -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f, | |
91 }; | |
92 ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { | |
93 -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, | |
94 -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, | |
95 -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f, | |
96 -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f, | |
97 -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, | |
98 -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, | |
99 -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f, | |
100 -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f, | |
101 }; | |
102 ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { | |
103 -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, | |
104 -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f, | |
105 -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, | |
106 -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f, | |
107 -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, | |
108 -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, | |
109 -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f, | |
110 -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f, | |
111 }; | |
112 ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { | |
113 -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, | |
114 -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f, | |
115 -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f, | |
116 -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f, | |
117 -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, | |
118 -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, | |
119 -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f, | |
120 -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f, | |
121 }; | |
122 ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { | |
123 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, | |
124 }; | |
125 | |
126 static void bitrv2_128_C(float* a) { | |
127 /* | |
128 Following things have been attempted but are no faster: | |
129 (a) Storing the swap indexes in a LUT (index calculations are done | |
130 for 'free' while waiting on memory/L1). | |
131 (b) Consolidate the load/store of two consecutive floats by a 64 bit | |
132 integer (execution is memory/L1 bound). | |
133 (c) Do a mix of floats and 64 bit integer to maximize register | |
134 utilization (execution is memory/L1 bound). | |
135 (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). | |
136 (e) Hard-coding of the offsets to completely eliminates index | |
137 calculations. | |
138 */ | |
139 | |
140 unsigned int j, j1, k, k1; | |
141 float xr, xi, yr, yi; | |
142 | |
143 static const int ip[4] = {0, 64, 32, 96}; | |
144 for (k = 0; k < 4; k++) { | |
145 for (j = 0; j < k; j++) { | |
146 j1 = 2 * j + ip[k]; | |
147 k1 = 2 * k + ip[j]; | |
148 xr = a[j1 + 0]; | |
149 xi = a[j1 + 1]; | |
150 yr = a[k1 + 0]; | |
151 yi = a[k1 + 1]; | |
152 a[j1 + 0] = yr; | |
153 a[j1 + 1] = yi; | |
154 a[k1 + 0] = xr; | |
155 a[k1 + 1] = xi; | |
156 j1 += 8; | |
157 k1 += 16; | |
158 xr = a[j1 + 0]; | |
159 xi = a[j1 + 1]; | |
160 yr = a[k1 + 0]; | |
161 yi = a[k1 + 1]; | |
162 a[j1 + 0] = yr; | |
163 a[j1 + 1] = yi; | |
164 a[k1 + 0] = xr; | |
165 a[k1 + 1] = xi; | |
166 j1 += 8; | |
167 k1 -= 8; | |
168 xr = a[j1 + 0]; | |
169 xi = a[j1 + 1]; | |
170 yr = a[k1 + 0]; | |
171 yi = a[k1 + 1]; | |
172 a[j1 + 0] = yr; | |
173 a[j1 + 1] = yi; | |
174 a[k1 + 0] = xr; | |
175 a[k1 + 1] = xi; | |
176 j1 += 8; | |
177 k1 += 16; | |
178 xr = a[j1 + 0]; | |
179 xi = a[j1 + 1]; | |
180 yr = a[k1 + 0]; | |
181 yi = a[k1 + 1]; | |
182 a[j1 + 0] = yr; | |
183 a[j1 + 1] = yi; | |
184 a[k1 + 0] = xr; | |
185 a[k1 + 1] = xi; | |
186 } | |
187 j1 = 2 * k + 8 + ip[k]; | |
188 k1 = j1 + 8; | |
189 xr = a[j1 + 0]; | |
190 xi = a[j1 + 1]; | |
191 yr = a[k1 + 0]; | |
192 yi = a[k1 + 1]; | |
193 a[j1 + 0] = yr; | |
194 a[j1 + 1] = yi; | |
195 a[k1 + 0] = xr; | |
196 a[k1 + 1] = xi; | |
197 } | |
198 } | |
199 | |
200 static void cft1st_128_C(float* a) { | |
201 const int n = 128; | |
202 int j, k1, k2; | |
203 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; | |
204 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; | |
205 | |
206 // The processing of the first set of elements was simplified in C to avoid | |
207 // some operations (multiplication by zero or one, addition of two elements | |
208 // multiplied by the same weight, ...). | |
209 x0r = a[0] + a[2]; | |
210 x0i = a[1] + a[3]; | |
211 x1r = a[0] - a[2]; | |
212 x1i = a[1] - a[3]; | |
213 x2r = a[4] + a[6]; | |
214 x2i = a[5] + a[7]; | |
215 x3r = a[4] - a[6]; | |
216 x3i = a[5] - a[7]; | |
217 a[0] = x0r + x2r; | |
218 a[1] = x0i + x2i; | |
219 a[4] = x0r - x2r; | |
220 a[5] = x0i - x2i; | |
221 a[2] = x1r - x3i; | |
222 a[3] = x1i + x3r; | |
223 a[6] = x1r + x3i; | |
224 a[7] = x1i - x3r; | |
225 wk1r = rdft_w[2]; | |
226 x0r = a[8] + a[10]; | |
227 x0i = a[9] + a[11]; | |
228 x1r = a[8] - a[10]; | |
229 x1i = a[9] - a[11]; | |
230 x2r = a[12] + a[14]; | |
231 x2i = a[13] + a[15]; | |
232 x3r = a[12] - a[14]; | |
233 x3i = a[13] - a[15]; | |
234 a[8] = x0r + x2r; | |
235 a[9] = x0i + x2i; | |
236 a[12] = x2i - x0i; | |
237 a[13] = x0r - x2r; | |
238 x0r = x1r - x3i; | |
239 x0i = x1i + x3r; | |
240 a[10] = wk1r * (x0r - x0i); | |
241 a[11] = wk1r * (x0r + x0i); | |
242 x0r = x3i + x1r; | |
243 x0i = x3r - x1i; | |
244 a[14] = wk1r * (x0i - x0r); | |
245 a[15] = wk1r * (x0i + x0r); | |
246 k1 = 0; | |
247 for (j = 16; j < n; j += 16) { | |
248 k1 += 2; | |
249 k2 = 2 * k1; | |
250 wk2r = rdft_w[k1 + 0]; | |
251 wk2i = rdft_w[k1 + 1]; | |
252 wk1r = rdft_w[k2 + 0]; | |
253 wk1i = rdft_w[k2 + 1]; | |
254 wk3r = rdft_wk3ri_first[k1 + 0]; | |
255 wk3i = rdft_wk3ri_first[k1 + 1]; | |
256 x0r = a[j + 0] + a[j + 2]; | |
257 x0i = a[j + 1] + a[j + 3]; | |
258 x1r = a[j + 0] - a[j + 2]; | |
259 x1i = a[j + 1] - a[j + 3]; | |
260 x2r = a[j + 4] + a[j + 6]; | |
261 x2i = a[j + 5] + a[j + 7]; | |
262 x3r = a[j + 4] - a[j + 6]; | |
263 x3i = a[j + 5] - a[j + 7]; | |
264 a[j + 0] = x0r + x2r; | |
265 a[j + 1] = x0i + x2i; | |
266 x0r -= x2r; | |
267 x0i -= x2i; | |
268 a[j + 4] = wk2r * x0r - wk2i * x0i; | |
269 a[j + 5] = wk2r * x0i + wk2i * x0r; | |
270 x0r = x1r - x3i; | |
271 x0i = x1i + x3r; | |
272 a[j + 2] = wk1r * x0r - wk1i * x0i; | |
273 a[j + 3] = wk1r * x0i + wk1i * x0r; | |
274 x0r = x1r + x3i; | |
275 x0i = x1i - x3r; | |
276 a[j + 6] = wk3r * x0r - wk3i * x0i; | |
277 a[j + 7] = wk3r * x0i + wk3i * x0r; | |
278 wk1r = rdft_w[k2 + 2]; | |
279 wk1i = rdft_w[k2 + 3]; | |
280 wk3r = rdft_wk3ri_second[k1 + 0]; | |
281 wk3i = rdft_wk3ri_second[k1 + 1]; | |
282 x0r = a[j + 8] + a[j + 10]; | |
283 x0i = a[j + 9] + a[j + 11]; | |
284 x1r = a[j + 8] - a[j + 10]; | |
285 x1i = a[j + 9] - a[j + 11]; | |
286 x2r = a[j + 12] + a[j + 14]; | |
287 x2i = a[j + 13] + a[j + 15]; | |
288 x3r = a[j + 12] - a[j + 14]; | |
289 x3i = a[j + 13] - a[j + 15]; | |
290 a[j + 8] = x0r + x2r; | |
291 a[j + 9] = x0i + x2i; | |
292 x0r -= x2r; | |
293 x0i -= x2i; | |
294 a[j + 12] = -wk2i * x0r - wk2r * x0i; | |
295 a[j + 13] = -wk2i * x0i + wk2r * x0r; | |
296 x0r = x1r - x3i; | |
297 x0i = x1i + x3r; | |
298 a[j + 10] = wk1r * x0r - wk1i * x0i; | |
299 a[j + 11] = wk1r * x0i + wk1i * x0r; | |
300 x0r = x1r + x3i; | |
301 x0i = x1i - x3r; | |
302 a[j + 14] = wk3r * x0r - wk3i * x0i; | |
303 a[j + 15] = wk3r * x0i + wk3i * x0r; | |
304 } | |
305 } | |
306 | |
307 static void cftmdl_128_C(float* a) { | |
308 const int l = 8; | |
309 const int n = 128; | |
310 const int m = 32; | |
311 int j0, j1, j2, j3, k, k1, k2, m2; | |
312 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; | |
313 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; | |
314 | |
315 for (j0 = 0; j0 < l; j0 += 2) { | |
316 j1 = j0 + 8; | |
317 j2 = j0 + 16; | |
318 j3 = j0 + 24; | |
319 x0r = a[j0 + 0] + a[j1 + 0]; | |
320 x0i = a[j0 + 1] + a[j1 + 1]; | |
321 x1r = a[j0 + 0] - a[j1 + 0]; | |
322 x1i = a[j0 + 1] - a[j1 + 1]; | |
323 x2r = a[j2 + 0] + a[j3 + 0]; | |
324 x2i = a[j2 + 1] + a[j3 + 1]; | |
325 x3r = a[j2 + 0] - a[j3 + 0]; | |
326 x3i = a[j2 + 1] - a[j3 + 1]; | |
327 a[j0 + 0] = x0r + x2r; | |
328 a[j0 + 1] = x0i + x2i; | |
329 a[j2 + 0] = x0r - x2r; | |
330 a[j2 + 1] = x0i - x2i; | |
331 a[j1 + 0] = x1r - x3i; | |
332 a[j1 + 1] = x1i + x3r; | |
333 a[j3 + 0] = x1r + x3i; | |
334 a[j3 + 1] = x1i - x3r; | |
335 } | |
336 wk1r = rdft_w[2]; | |
337 for (j0 = m; j0 < l + m; j0 += 2) { | |
338 j1 = j0 + 8; | |
339 j2 = j0 + 16; | |
340 j3 = j0 + 24; | |
341 x0r = a[j0 + 0] + a[j1 + 0]; | |
342 x0i = a[j0 + 1] + a[j1 + 1]; | |
343 x1r = a[j0 + 0] - a[j1 + 0]; | |
344 x1i = a[j0 + 1] - a[j1 + 1]; | |
345 x2r = a[j2 + 0] + a[j3 + 0]; | |
346 x2i = a[j2 + 1] + a[j3 + 1]; | |
347 x3r = a[j2 + 0] - a[j3 + 0]; | |
348 x3i = a[j2 + 1] - a[j3 + 1]; | |
349 a[j0 + 0] = x0r + x2r; | |
350 a[j0 + 1] = x0i + x2i; | |
351 a[j2 + 0] = x2i - x0i; | |
352 a[j2 + 1] = x0r - x2r; | |
353 x0r = x1r - x3i; | |
354 x0i = x1i + x3r; | |
355 a[j1 + 0] = wk1r * (x0r - x0i); | |
356 a[j1 + 1] = wk1r * (x0r + x0i); | |
357 x0r = x3i + x1r; | |
358 x0i = x3r - x1i; | |
359 a[j3 + 0] = wk1r * (x0i - x0r); | |
360 a[j3 + 1] = wk1r * (x0i + x0r); | |
361 } | |
362 k1 = 0; | |
363 m2 = 2 * m; | |
364 for (k = m2; k < n; k += m2) { | |
365 k1 += 2; | |
366 k2 = 2 * k1; | |
367 wk2r = rdft_w[k1 + 0]; | |
368 wk2i = rdft_w[k1 + 1]; | |
369 wk1r = rdft_w[k2 + 0]; | |
370 wk1i = rdft_w[k2 + 1]; | |
371 wk3r = rdft_wk3ri_first[k1 + 0]; | |
372 wk3i = rdft_wk3ri_first[k1 + 1]; | |
373 for (j0 = k; j0 < l + k; j0 += 2) { | |
374 j1 = j0 + 8; | |
375 j2 = j0 + 16; | |
376 j3 = j0 + 24; | |
377 x0r = a[j0 + 0] + a[j1 + 0]; | |
378 x0i = a[j0 + 1] + a[j1 + 1]; | |
379 x1r = a[j0 + 0] - a[j1 + 0]; | |
380 x1i = a[j0 + 1] - a[j1 + 1]; | |
381 x2r = a[j2 + 0] + a[j3 + 0]; | |
382 x2i = a[j2 + 1] + a[j3 + 1]; | |
383 x3r = a[j2 + 0] - a[j3 + 0]; | |
384 x3i = a[j2 + 1] - a[j3 + 1]; | |
385 a[j0 + 0] = x0r + x2r; | |
386 a[j0 + 1] = x0i + x2i; | |
387 x0r -= x2r; | |
388 x0i -= x2i; | |
389 a[j2 + 0] = wk2r * x0r - wk2i * x0i; | |
390 a[j2 + 1] = wk2r * x0i + wk2i * x0r; | |
391 x0r = x1r - x3i; | |
392 x0i = x1i + x3r; | |
393 a[j1 + 0] = wk1r * x0r - wk1i * x0i; | |
394 a[j1 + 1] = wk1r * x0i + wk1i * x0r; | |
395 x0r = x1r + x3i; | |
396 x0i = x1i - x3r; | |
397 a[j3 + 0] = wk3r * x0r - wk3i * x0i; | |
398 a[j3 + 1] = wk3r * x0i + wk3i * x0r; | |
399 } | |
400 wk1r = rdft_w[k2 + 2]; | |
401 wk1i = rdft_w[k2 + 3]; | |
402 wk3r = rdft_wk3ri_second[k1 + 0]; | |
403 wk3i = rdft_wk3ri_second[k1 + 1]; | |
404 for (j0 = k + m; j0 < l + (k + m); j0 += 2) { | |
405 j1 = j0 + 8; | |
406 j2 = j0 + 16; | |
407 j3 = j0 + 24; | |
408 x0r = a[j0 + 0] + a[j1 + 0]; | |
409 x0i = a[j0 + 1] + a[j1 + 1]; | |
410 x1r = a[j0 + 0] - a[j1 + 0]; | |
411 x1i = a[j0 + 1] - a[j1 + 1]; | |
412 x2r = a[j2 + 0] + a[j3 + 0]; | |
413 x2i = a[j2 + 1] + a[j3 + 1]; | |
414 x3r = a[j2 + 0] - a[j3 + 0]; | |
415 x3i = a[j2 + 1] - a[j3 + 1]; | |
416 a[j0 + 0] = x0r + x2r; | |
417 a[j0 + 1] = x0i + x2i; | |
418 x0r -= x2r; | |
419 x0i -= x2i; | |
420 a[j2 + 0] = -wk2i * x0r - wk2r * x0i; | |
421 a[j2 + 1] = -wk2i * x0i + wk2r * x0r; | |
422 x0r = x1r - x3i; | |
423 x0i = x1i + x3r; | |
424 a[j1 + 0] = wk1r * x0r - wk1i * x0i; | |
425 a[j1 + 1] = wk1r * x0i + wk1i * x0r; | |
426 x0r = x1r + x3i; | |
427 x0i = x1i - x3r; | |
428 a[j3 + 0] = wk3r * x0r - wk3i * x0i; | |
429 a[j3 + 1] = wk3r * x0i + wk3i * x0r; | |
430 } | |
431 } | |
432 } | |
433 | |
434 static void cftfsub_128_C(float* a) { | |
435 int j, j1, j2, j3, l; | |
436 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; | |
437 | |
438 cft1st_128(a); | |
439 cftmdl_128(a); | |
440 l = 32; | |
441 for (j = 0; j < l; j += 2) { | |
442 j1 = j + l; | |
443 j2 = j1 + l; | |
444 j3 = j2 + l; | |
445 x0r = a[j] + a[j1]; | |
446 x0i = a[j + 1] + a[j1 + 1]; | |
447 x1r = a[j] - a[j1]; | |
448 x1i = a[j + 1] - a[j1 + 1]; | |
449 x2r = a[j2] + a[j3]; | |
450 x2i = a[j2 + 1] + a[j3 + 1]; | |
451 x3r = a[j2] - a[j3]; | |
452 x3i = a[j2 + 1] - a[j3 + 1]; | |
453 a[j] = x0r + x2r; | |
454 a[j + 1] = x0i + x2i; | |
455 a[j2] = x0r - x2r; | |
456 a[j2 + 1] = x0i - x2i; | |
457 a[j1] = x1r - x3i; | |
458 a[j1 + 1] = x1i + x3r; | |
459 a[j3] = x1r + x3i; | |
460 a[j3 + 1] = x1i - x3r; | |
461 } | |
462 } | |
463 | |
464 static void cftbsub_128_C(float* a) { | |
465 int j, j1, j2, j3, l; | |
466 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; | |
467 | |
468 cft1st_128(a); | |
469 cftmdl_128(a); | |
470 l = 32; | |
471 | |
472 for (j = 0; j < l; j += 2) { | |
473 j1 = j + l; | |
474 j2 = j1 + l; | |
475 j3 = j2 + l; | |
476 x0r = a[j] + a[j1]; | |
477 x0i = -a[j + 1] - a[j1 + 1]; | |
478 x1r = a[j] - a[j1]; | |
479 x1i = -a[j + 1] + a[j1 + 1]; | |
480 x2r = a[j2] + a[j3]; | |
481 x2i = a[j2 + 1] + a[j3 + 1]; | |
482 x3r = a[j2] - a[j3]; | |
483 x3i = a[j2 + 1] - a[j3 + 1]; | |
484 a[j] = x0r + x2r; | |
485 a[j + 1] = x0i - x2i; | |
486 a[j2] = x0r - x2r; | |
487 a[j2 + 1] = x0i + x2i; | |
488 a[j1] = x1r - x3i; | |
489 a[j1 + 1] = x1i - x3r; | |
490 a[j3] = x1r + x3i; | |
491 a[j3 + 1] = x1i + x3r; | |
492 } | |
493 } | |
494 | |
495 static void rftfsub_128_C(float* a) { | |
496 const float* c = rdft_w + 32; | |
497 int j1, j2, k1, k2; | |
498 float wkr, wki, xr, xi, yr, yi; | |
499 | |
500 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { | |
501 k2 = 128 - j2; | |
502 k1 = 32 - j1; | |
503 wkr = 0.5f - c[k1]; | |
504 wki = c[j1]; | |
505 xr = a[j2 + 0] - a[k2 + 0]; | |
506 xi = a[j2 + 1] + a[k2 + 1]; | |
507 yr = wkr * xr - wki * xi; | |
508 yi = wkr * xi + wki * xr; | |
509 a[j2 + 0] -= yr; | |
510 a[j2 + 1] -= yi; | |
511 a[k2 + 0] += yr; | |
512 a[k2 + 1] -= yi; | |
513 } | |
514 } | |
515 | |
516 static void rftbsub_128_C(float* a) { | |
517 const float* c = rdft_w + 32; | |
518 int j1, j2, k1, k2; | |
519 float wkr, wki, xr, xi, yr, yi; | |
520 | |
521 a[1] = -a[1]; | |
522 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { | |
523 k2 = 128 - j2; | |
524 k1 = 32 - j1; | |
525 wkr = 0.5f - c[k1]; | |
526 wki = c[j1]; | |
527 xr = a[j2 + 0] - a[k2 + 0]; | |
528 xi = a[j2 + 1] + a[k2 + 1]; | |
529 yr = wkr * xr + wki * xi; | |
530 yi = wkr * xi - wki * xr; | |
531 a[j2 + 0] = a[j2 + 0] - yr; | |
532 a[j2 + 1] = yi - a[j2 + 1]; | |
533 a[k2 + 0] = yr + a[k2 + 0]; | |
534 a[k2 + 1] = yi - a[k2 + 1]; | |
535 } | |
536 a[65] = -a[65]; | |
537 } | |
538 | |
539 void aec_rdft_forward_128(float* a) { | |
540 float xi; | |
541 bitrv2_128(a); | |
542 cftfsub_128(a); | |
543 rftfsub_128(a); | |
544 xi = a[0] - a[1]; | |
545 a[0] += a[1]; | |
546 a[1] = xi; | |
547 } | |
548 | |
549 void aec_rdft_inverse_128(float* a) { | |
550 a[1] = 0.5f * (a[0] - a[1]); | |
551 a[0] -= a[1]; | |
552 rftbsub_128(a); | |
553 bitrv2_128(a); | |
554 cftbsub_128(a); | |
555 } | |
556 | |
557 // code path selection | |
558 RftSub128 cft1st_128; | |
559 RftSub128 cftmdl_128; | |
560 RftSub128 rftfsub_128; | |
561 RftSub128 rftbsub_128; | |
562 RftSub128 cftfsub_128; | |
563 RftSub128 cftbsub_128; | |
564 RftSub128 bitrv2_128; | |
565 | |
566 void aec_rdft_init(void) { | |
567 cft1st_128 = cft1st_128_C; | |
568 cftmdl_128 = cftmdl_128_C; | |
569 rftfsub_128 = rftfsub_128_C; | |
570 rftbsub_128 = rftbsub_128_C; | |
571 cftfsub_128 = cftfsub_128_C; | |
572 cftbsub_128 = cftbsub_128_C; | |
573 bitrv2_128 = bitrv2_128_C; | |
574 #if defined(WEBRTC_ARCH_X86_FAMILY) | |
575 if (WebRtc_GetCPUInfo(kSSE2)) { | |
576 aec_rdft_init_sse2(); | |
577 } | |
578 #endif | |
579 #if defined(MIPS_FPU_LE) | |
580 aec_rdft_init_mips(); | |
581 #endif | |
582 #if defined(WEBRTC_HAS_NEON) | |
583 aec_rdft_init_neon(); | |
584 #elif defined(WEBRTC_DETECT_NEON) | |
585 if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { | |
586 aec_rdft_init_neon(); | |
587 } | |
588 #endif | |
589 } | |
OLD | NEW |