Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(133)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_mips.c

Issue 1639283002: Clang format changes (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 26 matching lines...) Expand all
37 const float pi2 = 6.28318530717959f; 37 const float pi2 = 6.28318530717959f;
38 const float pi2t = pi2 / 32768; 38 const float pi2t = pi2 / 32768;
39 39
40 // Generate a uniform random array on [0 1] 40 // Generate a uniform random array on [0 1]
41 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); 41 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
42 42
43 int16_t* randWptr = randW16; 43 int16_t* randWptr = randW16;
44 float randTemp, randTemp2, randTemp3, randTemp4; 44 float randTemp, randTemp2, randTemp3, randTemp4;
45 int32_t tmp1s, tmp2s, tmp3s, tmp4s; 45 int32_t tmp1s, tmp2s, tmp3s, tmp4s;
46 46
47 for (i = 0; i < PART_LEN; i+=4) { 47 for (i = 0; i < PART_LEN; i += 4) {
48 __asm __volatile ( 48 __asm __volatile(
49 ".set push \n\t" 49 ".set push \n\t"
50 ".set noreorder \n\t" 50 ".set noreorder \n\t"
51 "lh %[tmp1s], 0(%[randWptr]) \n\t" 51 "lh %[tmp1s], 0(%[randWptr]) \n\t"
52 "lh %[tmp2s], 2(%[randWptr]) \n\t" 52 "lh %[tmp2s], 2(%[randWptr]) \n\t"
53 "lh %[tmp3s], 4(%[randWptr]) \n\t" 53 "lh %[tmp3s], 4(%[randWptr]) \n\t"
54 "lh %[tmp4s], 6(%[randWptr]) \n\t" 54 "lh %[tmp4s], 6(%[randWptr]) \n\t"
55 "mtc1 %[tmp1s], %[randTemp] \n\t" 55 "mtc1 %[tmp1s], %[randTemp] \n\t"
56 "mtc1 %[tmp2s], %[randTemp2] \n\t" 56 "mtc1 %[tmp2s], %[randTemp2] \n\t"
57 "mtc1 %[tmp3s], %[randTemp3] \n\t" 57 "mtc1 %[tmp3s], %[randTemp3] \n\t"
58 "mtc1 %[tmp4s], %[randTemp4] \n\t" 58 "mtc1 %[tmp4s], %[randTemp4] \n\t"
59 "cvt.s.w %[randTemp], %[randTemp] \n\t" 59 "cvt.s.w %[randTemp], %[randTemp] \n\t"
60 "cvt.s.w %[randTemp2], %[randTemp2] \n\t" 60 "cvt.s.w %[randTemp2], %[randTemp2] \n\t"
61 "cvt.s.w %[randTemp3], %[randTemp3] \n\t" 61 "cvt.s.w %[randTemp3], %[randTemp3] \n\t"
62 "cvt.s.w %[randTemp4], %[randTemp4] \n\t" 62 "cvt.s.w %[randTemp4], %[randTemp4] \n\t"
63 "addiu %[randWptr], %[randWptr], 8 \n\t" 63 "addiu %[randWptr], %[randWptr], 8 \n\t"
64 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" 64 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
65 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" 65 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
66 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" 66 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
67 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" 67 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
68 ".set pop \n\t" 68 ".set pop \n\t"
69 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), 69 : [randWptr] "+r"(randWptr), [randTemp] "=&f"(randTemp),
70 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), 70 [randTemp2] "=&f"(randTemp2), [randTemp3] "=&f"(randTemp3),
71 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), 71 [randTemp4] "=&f"(randTemp4), [tmp1s] "=&r"(tmp1s),
72 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), 72 [tmp2s] "=&r"(tmp2s), [tmp3s] "=&r"(tmp3s), [tmp4s] "=&r"(tmp4s)
73 [tmp4s] "=&r" (tmp4s) 73 : [pi2t] "f"(pi2t)
74 : [pi2t] "f" (pi2t) 74 : "memory");
75 : "memory"
76 );
77 75
78 u[i+1][0] = cosf(randTemp); 76 u[i + 1][0] = cosf(randTemp);
79 u[i+1][1] = sinf(randTemp); 77 u[i + 1][1] = sinf(randTemp);
80 u[i+2][0] = cosf(randTemp2); 78 u[i + 2][0] = cosf(randTemp2);
81 u[i+2][1] = sinf(randTemp2); 79 u[i + 2][1] = sinf(randTemp2);
82 u[i+3][0] = cosf(randTemp3); 80 u[i + 3][0] = cosf(randTemp3);
83 u[i+3][1] = sinf(randTemp3); 81 u[i + 3][1] = sinf(randTemp3);
84 u[i+4][0] = cosf(randTemp4); 82 u[i + 4][0] = cosf(randTemp4);
85 u[i+4][1] = sinf(randTemp4); 83 u[i + 4][1] = sinf(randTemp4);
86 } 84 }
87 85
88 // Reject LF noise 86 // Reject LF noise
89 float* u_ptr = &u[1][0]; 87 float* u_ptr = &u[1][0];
90 float noise2, noise3, noise4; 88 float noise2, noise3, noise4;
91 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; 89 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
92 90
93 u[0][0] = 0; 91 u[0][0] = 0;
94 u[0][1] = 0; 92 u[0][1] = 0;
95 for (i = 1; i < PART_LEN1; i+=4) { 93 for (i = 1; i < PART_LEN1; i += 4) {
96 __asm __volatile ( 94 __asm __volatile(
97 ".set push \n\t" 95 ".set push \n\t"
98 ".set noreorder \n\t" 96 ".set noreorder \n\t"
99 "lwc1 %[noise], 4(%[noisePow]) \n\t" 97 "lwc1 %[noise], 4(%[noisePow]) \n\t"
100 "lwc1 %[noise2], 8(%[noisePow]) \n\t" 98 "lwc1 %[noise2], 8(%[noisePow]) \n\t"
101 "lwc1 %[noise3], 12(%[noisePow]) \n\t" 99 "lwc1 %[noise3], 12(%[noisePow]) \n\t"
102 "lwc1 %[noise4], 16(%[noisePow]) \n\t" 100 "lwc1 %[noise4], 16(%[noisePow]) \n\t"
103 "sqrt.s %[noise], %[noise] \n\t" 101 "sqrt.s %[noise], %[noise] \n\t"
104 "sqrt.s %[noise2], %[noise2] \n\t" 102 "sqrt.s %[noise2], %[noise2] \n\t"
105 "sqrt.s %[noise3], %[noise3] \n\t" 103 "sqrt.s %[noise3], %[noise3] \n\t"
106 "sqrt.s %[noise4], %[noise4] \n\t" 104 "sqrt.s %[noise4], %[noise4] \n\t"
107 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" 105 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
108 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" 106 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
109 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" 107 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
110 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" 108 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
111 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" 109 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
112 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" 110 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
113 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" 111 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
114 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" 112 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
115 "addiu %[noisePow], %[noisePow], 16 \n\t" 113 "addiu %[noisePow], %[noisePow], 16 \n\t"
116 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" 114 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
117 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" 115 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
118 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" 116 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
119 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" 117 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
120 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" 118 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
121 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" 119 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
122 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" 120 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
123 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" 121 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
124 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" 122 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
125 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" 123 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
126 "neg.s %[tmp2f] \n\t" 124 "neg.s %[tmp2f] \n\t"
127 "neg.s %[tmp4f] \n\t" 125 "neg.s %[tmp4f] \n\t"
128 "neg.s %[tmp6f] \n\t" 126 "neg.s %[tmp6f] \n\t"
129 "neg.s %[tmp8f] \n\t" 127 "neg.s %[tmp8f] \n\t"
130 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" 128 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
131 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" 129 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
132 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" 130 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
133 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" 131 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
134 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" 132 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
135 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" 133 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
136 "addiu %[u_ptr], %[u_ptr], 32 \n\t" 134 "addiu %[u_ptr], %[u_ptr], 32 \n\t"
137 ".set pop \n\t" 135 ".set pop \n\t"
138 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), 136 : [u_ptr] "+r"(u_ptr), [noisePow] "+r"(noisePow), [noise] "=&f"(noise),
139 [noise] "=&f" (noise), [noise2] "=&f" (noise2), 137 [noise2] "=&f"(noise2), [noise3] "=&f"(noise3),
140 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), 138 [noise4] "=&f"(noise4), [tmp1f] "=&f"(tmp1f), [tmp2f] "=&f"(tmp2f),
141 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), 139 [tmp3f] "=&f"(tmp3f), [tmp4f] "=&f"(tmp4f), [tmp5f] "=&f"(tmp5f),
142 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), 140 [tmp6f] "=&f"(tmp6f), [tmp7f] "=&f"(tmp7f), [tmp8f] "=&f"(tmp8f)
143 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), 141 :
144 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) 142 : "memory");
145 :
146 : "memory"
147 );
148 } 143 }
149 u[PART_LEN][1] = 0; 144 u[PART_LEN][1] = 0;
150 noisePow -= PART_LEN; 145 noisePow -= PART_LEN;
151 146
152 u_ptr = &u[0][0]; 147 u_ptr = &u[0][0];
153 float* u_ptr_end = &u[PART_LEN][0]; 148 float* u_ptr_end = &u[PART_LEN][0];
154 float* efw_ptr_0 = &efw[0][0]; 149 float* efw_ptr_0 = &efw[0][0];
155 float* efw_ptr_1 = &efw[1][0]; 150 float* efw_ptr_1 = &efw[1][0];
156 float tmp9f, tmp10f; 151 float tmp9f, tmp10f;
157 const float tmp1c = 1.0; 152 const float tmp1c = 1.0;
158 153
159 __asm __volatile ( 154 __asm __volatile(
160 ".set push \n\t" 155 ".set push "
161 ".set noreorder \n\t" 156 "\n\t"
162 "1: \n\t" 157 ".set noreorder "
163 "lwc1 %[tmp1f], 0(%[lambda]) \n\t" 158 "\n\t"
164 "lwc1 %[tmp6f], 4(%[lambda]) \n\t" 159 "1: "
165 "addiu %[lambda], %[lambda], 8 \n\t" 160 "\n\t"
166 "c.lt.s %[tmp1f], %[tmp1c] \n\t" 161 "lwc1 %[tmp1f], 0(%[lambda]) "
167 "bc1f 4f \n\t" 162 "\n\t"
168 " nop \n\t" 163 "lwc1 %[tmp6f], 4(%[lambda]) "
169 "c.lt.s %[tmp6f], %[tmp1c] \n\t" 164 "\n\t"
170 "bc1f 3f \n\t" 165 "addiu %[lambda], %[lambda], 8 "
171 " nop \n\t" 166 "\n\t"
172 "2: \n\t" 167 "c.lt.s %[tmp1f], %[tmp1c] "
173 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" 168 "\n\t"
174 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" 169 "bc1f 4f "
175 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" 170 "\n\t"
176 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" 171 " nop "
177 "sqrt.s %[tmp1f], %[tmp1f] \n\t" 172 "\n\t"
178 "sqrt.s %[tmp6f], %[tmp6f] \n\t" 173 "c.lt.s %[tmp6f], %[tmp1c] "
179 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 174 "\n\t"
180 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" 175 "bc1f 3f "
181 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 176 "\n\t"
182 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" 177 " nop "
183 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 178 "\n\t"
184 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" 179 "2: "
185 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 180 "\n\t"
186 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" 181 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] "
182 "\n\t"
183 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] "
184 "\n\t"
185 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] "
186 "\n\t"
187 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] "
188 "\n\t"
189 "sqrt.s %[tmp1f], %[tmp1f] "
190 "\n\t"
191 "sqrt.s %[tmp6f], %[tmp6f] "
192 "\n\t"
193 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) "
194 "\n\t"
195 "lwc1 %[tmp3f], 0(%[u_ptr]) "
196 "\n\t"
197 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) "
198 "\n\t"
199 "lwc1 %[tmp8f], 8(%[u_ptr]) "
200 "\n\t"
201 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) "
202 "\n\t"
203 "lwc1 %[tmp5f], 4(%[u_ptr]) "
204 "\n\t"
205 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) "
206 "\n\t"
207 "lwc1 %[tmp10f], 12(%[u_ptr]) "
208 "\n\t"
187 #if !defined(MIPS32_R2_LE) 209 #if !defined(MIPS32_R2_LE)
188 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" 210 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] "
189 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" 211 "\n\t"
190 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" 212 "add.s %[tmp2f], %[tmp2f], %[tmp3f] "
191 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" 213 "\n\t"
192 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" 214 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] "
193 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" 215 "\n\t"
194 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" 216 "add.s %[tmp4f], %[tmp4f], %[tmp3f] "
195 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" 217 "\n\t"
196 #else // #if !defined(MIPS32_R2_LE) 218 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] "
197 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" 219 "\n\t"
198 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" 220 "add.s %[tmp7f], %[tmp7f], %[tmp3f] "
199 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" 221 "\n\t"
200 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" 222 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] "
201 #endif // #if !defined(MIPS32_R2_LE) 223 "\n\t"
202 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 224 "add.s %[tmp9f], %[tmp9f], %[tmp3f] "
203 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 225 "\n\t"
204 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 226 #else // #if !defined(MIPS32_R2_LE)
205 "b 5f \n\t" 227 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] "
206 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 228 "\n\t"
207 "3: \n\t" 229 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] "
208 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" 230 "\n\t"
209 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" 231 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] "
210 "sqrt.s %[tmp1f], %[tmp1f] \n\t" 232 "\n\t"
211 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 233 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] "
212 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" 234 "\n\t"
213 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 235 #endif // #if !defined(MIPS32_R2_LE)
214 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" 236 "swc1 %[tmp2f], 0(%[efw_ptr_0]) "
237 "\n\t"
238 "swc1 %[tmp4f], 0(%[efw_ptr_1]) "
239 "\n\t"
240 "swc1 %[tmp7f], 4(%[efw_ptr_0]) "
241 "\n\t"
242 "b 5f "
243 "\n\t"
244 " swc1 %[tmp9f], 4(%[efw_ptr_1]) "
245 "\n\t"
246 "3: "
247 "\n\t"
248 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] "
249 "\n\t"
250 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] "
251 "\n\t"
252 "sqrt.s %[tmp1f], %[tmp1f] "
253 "\n\t"
254 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) "
255 "\n\t"
256 "lwc1 %[tmp3f], 0(%[u_ptr]) "
257 "\n\t"
258 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) "
259 "\n\t"
260 "lwc1 %[tmp5f], 4(%[u_ptr]) "
261 "\n\t"
215 #if !defined(MIPS32_R2_LE) 262 #if !defined(MIPS32_R2_LE)
216 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" 263 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] "
217 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" 264 "\n\t"
218 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" 265 "add.s %[tmp2f], %[tmp2f], %[tmp3f] "
219 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" 266 "\n\t"
220 #else // #if !defined(MIPS32_R2_LE) 267 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] "
221 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" 268 "\n\t"
222 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" 269 "add.s %[tmp4f], %[tmp4f], %[tmp3f] "
223 #endif // #if !defined(MIPS32_R2_LE) 270 "\n\t"
224 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 271 #else // #if !defined(MIPS32_R2_LE)
225 "b 5f \n\t" 272 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] "
226 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 273 "\n\t"
227 "4: \n\t" 274 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] "
228 "c.lt.s %[tmp6f], %[tmp1c] \n\t" 275 "\n\t"
229 "bc1f 5f \n\t" 276 #endif // #if !defined(MIPS32_R2_LE)
230 " nop \n\t" 277 "swc1 %[tmp2f], 0(%[efw_ptr_0]) "
231 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" 278 "\n\t"
232 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" 279 "b 5f "
233 "sqrt.s %[tmp6f], %[tmp6f] \n\t" 280 "\n\t"
234 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 281 " swc1 %[tmp4f], 0(%[efw_ptr_1]) "
235 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" 282 "\n\t"
236 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 283 "4: "
237 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" 284 "\n\t"
285 "c.lt.s %[tmp6f], %[tmp1c] "
286 "\n\t"
287 "bc1f 5f "
288 "\n\t"
289 " nop "
290 "\n\t"
291 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] "
292 "\n\t"
293 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] "
294 "\n\t"
295 "sqrt.s %[tmp6f], %[tmp6f] "
296 "\n\t"
297 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) "
298 "\n\t"
299 "lwc1 %[tmp8f], 8(%[u_ptr]) "
300 "\n\t"
301 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) "
302 "\n\t"
303 "lwc1 %[tmp10f], 12(%[u_ptr]) "
304 "\n\t"
238 #if !defined(MIPS32_R2_LE) 305 #if !defined(MIPS32_R2_LE)
239 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" 306 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] "
240 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" 307 "\n\t"
241 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" 308 "add.s %[tmp7f], %[tmp7f], %[tmp3f] "
242 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" 309 "\n\t"
243 #else // #if !defined(MIPS32_R2_LE) 310 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] "
244 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" 311 "\n\t"
245 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" 312 "add.s %[tmp9f], %[tmp9f], %[tmp3f] "
246 #endif // #if !defined(MIPS32_R2_LE) 313 "\n\t"
247 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 314 #else // #if !defined(MIPS32_R2_LE)
248 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 315 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] "
249 "5: \n\t" 316 "\n\t"
250 "addiu %[u_ptr], %[u_ptr], 16 \n\t" 317 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] "
251 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" 318 "\n\t"
252 "bne %[u_ptr], %[u_ptr_end], 1b \n\t" 319 #endif // #if !defined(MIPS32_R2_LE)
253 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" 320 "swc1 %[tmp7f], 4(%[efw_ptr_0]) "
254 ".set pop \n\t" 321 "\n\t"
255 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), 322 "swc1 %[tmp9f], 4(%[efw_ptr_1]) "
256 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), 323 "\n\t"
257 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), 324 "5: "
258 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), 325 "\n\t"
259 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), 326 "addiu %[u_ptr], %[u_ptr], 16 "
260 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) 327 "\n\t"
261 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) 328 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 "
262 : "memory" 329 "\n\t"
263 ); 330 "bne %[u_ptr], %[u_ptr_end], 1b "
331 "\n\t"
332 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 "
333 "\n\t"
334 ".set pop "
335 "\n\t"
336 : [lambda] "+r"(lambda), [u_ptr] "+r"(u_ptr), [efw_ptr_0] "+r"(efw_ptr_0),
337 [efw_ptr_1] "+r"(efw_ptr_1), [tmp1f] "=&f"(tmp1f), [tmp2f] "=&f"(tmp2f),
338 [tmp3f] "=&f"(tmp3f), [tmp4f] "=&f"(tmp4f), [tmp5f] "=&f"(tmp5f),
339 [tmp6f] "=&f"(tmp6f), [tmp7f] "=&f"(tmp7f), [tmp8f] "=&f"(tmp8f),
340 [tmp9f] "=&f"(tmp9f), [tmp10f] "=&f"(tmp10f)
341 : [tmp1c] "f"(tmp1c), [u_ptr_end] "r"(u_ptr_end)
342 : "memory");
kwiberg-webrtc 2016/01/27 14:08:21 Ouch. May I ask for a follow-up CL that makes sure
264 343
265 lambda -= PART_LEN; 344 lambda -= PART_LEN;
266 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); 345 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
267 //tmp = 1 - lambda[i]; 346 // tmp = 1 - lambda[i];
268 efw[0][PART_LEN] += tmp * u[PART_LEN][0]; 347 efw[0][PART_LEN] += tmp * u[PART_LEN][0];
269 efw[1][PART_LEN] += tmp * u[PART_LEN][1]; 348 efw[1][PART_LEN] += tmp * u[PART_LEN][1];
270 349
271 // For H band comfort noise 350 // For H band comfort noise
272 // TODO: don't compute noise and "tmp" twice. Use the previous results. 351 // TODO: don't compute noise and "tmp" twice. Use the previous results.
273 noiseAvg = 0.0; 352 noiseAvg = 0.0;
274 tmpAvg = 0.0; 353 tmpAvg = 0.0;
275 num = 0; 354 num = 0;
276 if (aec->num_bands > 1) { 355 if (aec->num_bands > 1) {
277 for (i = 0; i < PART_LEN; i++) { 356 for (i = 0; i < PART_LEN; i++) {
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 int num_partitions, 405 int num_partitions,
327 int x_fft_buf_block_pos, 406 int x_fft_buf_block_pos,
328 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 407 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
329 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 408 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
330 float y_fft[2][PART_LEN1]) { 409 float y_fft[2][PART_LEN1]) {
331 int i; 410 int i;
332 for (i = 0; i < num_partitions; i++) { 411 for (i = 0; i < num_partitions; i++) {
333 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; 412 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
334 int pos = i * PART_LEN1; 413 int pos = i * PART_LEN1;
335 // Check for wrap 414 // Check for wrap
336 if (i + x_fft_buf_block_pos >= num_partitions) { 415 if (i + x_fft_buf_block_pos >= num_partitions) {
337 xPos -= num_partitions * (PART_LEN1); 416 xPos -= num_partitions * (PART_LEN1);
338 } 417 }
339 float* yf0 = y_fft[0]; 418 float* yf0 = y_fft[0];
340 float* yf1 = y_fft[1]; 419 float* yf1 = y_fft[1];
341 float* aRe = x_fft_buf[0] + xPos; 420 float* aRe = x_fft_buf[0] + xPos;
342 float* aIm = x_fft_buf[1] + xPos; 421 float* aIm = x_fft_buf[1] + xPos;
343 float* bRe = h_fft_buf[0] + pos; 422 float* bRe = h_fft_buf[0] + pos;
344 float* bIm = h_fft_buf[1] + pos; 423 float* bIm = h_fft_buf[1] + pos;
345 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; 424 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
346 int len = PART_LEN1 >> 1; 425 int len = PART_LEN1 >> 1;
347 426
348 __asm __volatile ( 427 __asm __volatile(
349 ".set push \n\t" 428 ".set push \n\t"
350 ".set noreorder \n\t" 429 ".set noreorder \n\t"
351 "1: \n\t" 430 "1: \n\t"
352 "lwc1 %[f0], 0(%[aRe]) \n\t" 431 "lwc1 %[f0], 0(%[aRe]) \n\t"
353 "lwc1 %[f1], 0(%[bRe]) \n\t" 432 "lwc1 %[f1], 0(%[bRe]) \n\t"
354 "lwc1 %[f2], 0(%[bIm]) \n\t" 433 "lwc1 %[f2], 0(%[bIm]) \n\t"
355 "lwc1 %[f3], 0(%[aIm]) \n\t" 434 "lwc1 %[f3], 0(%[aIm]) \n\t"
356 "lwc1 %[f4], 4(%[aRe]) \n\t" 435 "lwc1 %[f4], 4(%[aRe]) \n\t"
357 "lwc1 %[f5], 4(%[bRe]) \n\t" 436 "lwc1 %[f5], 4(%[bRe]) \n\t"
358 "lwc1 %[f6], 4(%[bIm]) \n\t" 437 "lwc1 %[f6], 4(%[bIm]) \n\t"
359 "mul.s %[f8], %[f0], %[f1] \n\t" 438 "mul.s %[f8], %[f0], %[f1] \n\t"
360 "mul.s %[f0], %[f0], %[f2] \n\t" 439 "mul.s %[f0], %[f0], %[f2] \n\t"
361 "mul.s %[f9], %[f4], %[f5] \n\t" 440 "mul.s %[f9], %[f4], %[f5] \n\t"
362 "mul.s %[f4], %[f4], %[f6] \n\t" 441 "mul.s %[f4], %[f4], %[f6] \n\t"
363 "lwc1 %[f7], 4(%[aIm]) \n\t" 442 "lwc1 %[f7], 4(%[aIm]) \n\t"
364 #if !defined(MIPS32_R2_LE) 443 #if !defined(MIPS32_R2_LE)
365 "mul.s %[f12], %[f2], %[f3] \n\t" 444 "mul.s %[f12], %[f2], %[f3] \n\t"
366 "mul.s %[f1], %[f3], %[f1] \n\t" 445 "mul.s %[f1], %[f3], %[f1] \n\t"
367 "mul.s %[f11], %[f6], %[f7] \n\t" 446 "mul.s %[f11], %[f6], %[f7] \n\t"
368 "addiu %[aRe], %[aRe], 8 \n\t" 447 "addiu %[aRe], %[aRe], 8 \n\t"
369 "addiu %[aIm], %[aIm], 8 \n\t" 448 "addiu %[aIm], %[aIm], 8 \n\t"
370 "addiu %[len], %[len], -1 \n\t" 449 "addiu %[len], %[len], -1 \n\t"
371 "sub.s %[f8], %[f8], %[f12] \n\t" 450 "sub.s %[f8], %[f8], %[f12] \n\t"
372 "mul.s %[f12], %[f7], %[f5] \n\t" 451 "mul.s %[f12], %[f7], %[f5] \n\t"
373 "lwc1 %[f2], 0(%[yf0]) \n\t" 452 "lwc1 %[f2], 0(%[yf0]) \n\t"
374 "add.s %[f1], %[f0], %[f1] \n\t" 453 "add.s %[f1], %[f0], %[f1] \n\t"
375 "lwc1 %[f3], 0(%[yf1]) \n\t" 454 "lwc1 %[f3], 0(%[yf1]) \n\t"
376 "sub.s %[f9], %[f9], %[f11] \n\t" 455 "sub.s %[f9], %[f9], %[f11] \n\t"
377 "lwc1 %[f6], 4(%[yf0]) \n\t" 456 "lwc1 %[f6], 4(%[yf0]) \n\t"
378 "add.s %[f4], %[f4], %[f12] \n\t" 457 "add.s %[f4], %[f4], %[f12] \n\t"
379 #else // #if !defined(MIPS32_R2_LE) 458 #else // #if !defined(MIPS32_R2_LE)
380 "addiu %[aRe], %[aRe], 8 \n\t" 459 "addiu %[aRe], %[aRe], 8 \n\t"
381 "addiu %[aIm], %[aIm], 8 \n\t" 460 "addiu %[aIm], %[aIm], 8 \n\t"
382 "addiu %[len], %[len], -1 \n\t" 461 "addiu %[len], %[len], -1 \n\t"
383 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" 462 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
384 "lwc1 %[f2], 0(%[yf0]) \n\t" 463 "lwc1 %[f2], 0(%[yf0]) \n\t"
385 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" 464 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
386 "lwc1 %[f3], 0(%[yf1]) \n\t" 465 "lwc1 %[f3], 0(%[yf1]) \n\t"
387 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" 466 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t"
388 "lwc1 %[f6], 4(%[yf0]) \n\t" 467 "lwc1 %[f6], 4(%[yf0]) \n\t"
389 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" 468 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t"
390 #endif // #if !defined(MIPS32_R2_LE) 469 #endif // #if !defined(MIPS32_R2_LE)
391 "lwc1 %[f5], 4(%[yf1]) \n\t" 470 "lwc1 %[f5], 4(%[yf1]) \n\t"
392 "add.s %[f2], %[f2], %[f8] \n\t" 471 "add.s %[f2], %[f2], %[f8] \n\t"
393 "addiu %[bRe], %[bRe], 8 \n\t" 472 "addiu %[bRe], %[bRe], 8 \n\t"
394 "addiu %[bIm], %[bIm], 8 \n\t" 473 "addiu %[bIm], %[bIm], 8 \n\t"
395 "add.s %[f3], %[f3], %[f1] \n\t" 474 "add.s %[f3], %[f3], %[f1] \n\t"
396 "add.s %[f6], %[f6], %[f9] \n\t" 475 "add.s %[f6], %[f6], %[f9] \n\t"
397 "add.s %[f5], %[f5], %[f4] \n\t" 476 "add.s %[f5], %[f5], %[f4] \n\t"
398 "swc1 %[f2], 0(%[yf0]) \n\t" 477 "swc1 %[f2], 0(%[yf0]) \n\t"
399 "swc1 %[f3], 0(%[yf1]) \n\t" 478 "swc1 %[f3], 0(%[yf1]) \n\t"
400 "swc1 %[f6], 4(%[yf0]) \n\t" 479 "swc1 %[f6], 4(%[yf0]) \n\t"
401 "swc1 %[f5], 4(%[yf1]) \n\t" 480 "swc1 %[f5], 4(%[yf1]) \n\t"
402 "addiu %[yf0], %[yf0], 8 \n\t" 481 "addiu %[yf0], %[yf0], 8 \n\t"
403 "bgtz %[len], 1b \n\t" 482 "bgtz %[len], 1b \n\t"
404 " addiu %[yf1], %[yf1], 8 \n\t" 483 " addiu %[yf1], %[yf1], 8 \n\t"
405 "lwc1 %[f0], 0(%[aRe]) \n\t" 484 "lwc1 %[f0], 0(%[aRe]) \n\t"
406 "lwc1 %[f1], 0(%[bRe]) \n\t" 485 "lwc1 %[f1], 0(%[bRe]) \n\t"
407 "lwc1 %[f2], 0(%[bIm]) \n\t" 486 "lwc1 %[f2], 0(%[bIm]) \n\t"
408 "lwc1 %[f3], 0(%[aIm]) \n\t" 487 "lwc1 %[f3], 0(%[aIm]) \n\t"
409 "mul.s %[f8], %[f0], %[f1] \n\t" 488 "mul.s %[f8], %[f0], %[f1] \n\t"
410 "mul.s %[f0], %[f0], %[f2] \n\t" 489 "mul.s %[f0], %[f0], %[f2] \n\t"
411 #if !defined(MIPS32_R2_LE) 490 #if !defined(MIPS32_R2_LE)
412 "mul.s %[f12], %[f2], %[f3] \n\t" 491 "mul.s %[f12], %[f2], %[f3] \n\t"
413 "mul.s %[f1], %[f3], %[f1] \n\t" 492 "mul.s %[f1], %[f3], %[f1] \n\t"
414 "sub.s %[f8], %[f8], %[f12] \n\t" 493 "sub.s %[f8], %[f8], %[f12] \n\t"
415 "lwc1 %[f2], 0(%[yf0]) \n\t" 494 "lwc1 %[f2], 0(%[yf0]) \n\t"
416 "add.s %[f1], %[f0], %[f1] \n\t" 495 "add.s %[f1], %[f0], %[f1] \n\t"
417 "lwc1 %[f3], 0(%[yf1]) \n\t" 496 "lwc1 %[f3], 0(%[yf1]) \n\t"
418 #else // #if !defined(MIPS32_R2_LE) 497 #else // #if !defined(MIPS32_R2_LE)
419 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" 498 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
420 "lwc1 %[f2], 0(%[yf0]) \n\t" 499 "lwc1 %[f2], 0(%[yf0]) \n\t"
421 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" 500 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
422 "lwc1 %[f3], 0(%[yf1]) \n\t" 501 "lwc1 %[f3], 0(%[yf1]) \n\t"
423 #endif // #if !defined(MIPS32_R2_LE) 502 #endif // #if !defined(MIPS32_R2_LE)
424 "add.s %[f2], %[f2], %[f8] \n\t" 503 "add.s %[f2], %[f2], %[f8] \n\t"
425 "add.s %[f3], %[f3], %[f1] \n\t" 504 "add.s %[f3], %[f3], %[f1] \n\t"
426 "swc1 %[f2], 0(%[yf0]) \n\t" 505 "swc1 %[f2], 0(%[yf0]) \n\t"
427 "swc1 %[f3], 0(%[yf1]) \n\t" 506 "swc1 %[f3], 0(%[yf1]) \n\t"
428 ".set pop \n\t" 507 ".set pop \n\t"
429 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 508 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
430 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 509 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
431 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), 510 [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11),
432 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), 511 [f12] "=&f"(f12), [f13] "=&f"(f13), [aRe] "+r"(aRe), [aIm] "+r"(aIm),
433 [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), 512 [bRe] "+r"(bRe), [bIm] "+r"(bIm), [yf0] "+r"(yf0), [yf1] "+r"(yf1),
434 [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), 513 [len] "+r"(len)
435 [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) 514 :
436 : 515 : "memory");
437 : "memory"
438 );
439 } 516 }
440 } 517 }
441 518
442 void WebRtcAec_FilterAdaptation_mips( 519 void WebRtcAec_FilterAdaptation_mips(
443 int num_partitions, 520 int num_partitions,
444 int x_fft_buf_block_pos, 521 int x_fft_buf_block_pos,
445 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 522 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
446 float e_fft[2][PART_LEN1], 523 float e_fft[2][PART_LEN1],
447 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { 524 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
448 float fft[PART_LEN2]; 525 float fft[PART_LEN2];
449 int i; 526 int i;
450 for (i = 0; i < num_partitions; i++) { 527 for (i = 0; i < num_partitions; i++) {
451 int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1); 528 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
452 int pos; 529 int pos;
453 // Check for wrap 530 // Check for wrap
454 if (i + x_fft_buf_block_pos >= num_partitions) { 531 if (i + x_fft_buf_block_pos >= num_partitions) {
455 xPos -= num_partitions * PART_LEN1; 532 xPos -= num_partitions * PART_LEN1;
456 } 533 }
457 534
458 pos = i * PART_LEN1; 535 pos = i * PART_LEN1;
459 float* aRe = x_fft_buf[0] + xPos; 536 float* aRe = x_fft_buf[0] + xPos;
460 float* aIm = x_fft_buf[1] + xPos; 537 float* aIm = x_fft_buf[1] + xPos;
461 float* bRe = e_fft[0]; 538 float* bRe = e_fft[0];
462 float* bIm = e_fft[1]; 539 float* bIm = e_fft[1];
463 float* fft_tmp; 540 float* fft_tmp;
464 541
465 float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; 542 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12;
466 int len = PART_LEN >> 1; 543 int len = PART_LEN >> 1;
467 544
468 __asm __volatile ( 545 __asm __volatile(
469 ".set push \n\t" 546 ".set push \n\t"
470 ".set noreorder \n\t" 547 ".set noreorder \n\t"
471 "addiu %[fft_tmp], %[fft], 0 \n\t" 548 "addiu %[fft_tmp], %[fft], 0 \n\t"
472 "1: \n\t" 549 "1: \n\t"
473 "lwc1 %[f0], 0(%[aRe]) \n\t" 550 "lwc1 %[f0], 0(%[aRe]) \n\t"
474 "lwc1 %[f1], 0(%[bRe]) \n\t" 551 "lwc1 %[f1], 0(%[bRe]) \n\t"
475 "lwc1 %[f2], 0(%[bIm]) \n\t" 552 "lwc1 %[f2], 0(%[bIm]) \n\t"
476 "lwc1 %[f4], 4(%[aRe]) \n\t" 553 "lwc1 %[f4], 4(%[aRe]) \n\t"
477 "lwc1 %[f5], 4(%[bRe]) \n\t" 554 "lwc1 %[f5], 4(%[bRe]) \n\t"
478 "lwc1 %[f6], 4(%[bIm]) \n\t" 555 "lwc1 %[f6], 4(%[bIm]) \n\t"
479 "addiu %[aRe], %[aRe], 8 \n\t" 556 "addiu %[aRe], %[aRe], 8 \n\t"
480 "addiu %[bRe], %[bRe], 8 \n\t" 557 "addiu %[bRe], %[bRe], 8 \n\t"
481 "mul.s %[f8], %[f0], %[f1] \n\t" 558 "mul.s %[f8], %[f0], %[f1] \n\t"
482 "mul.s %[f0], %[f0], %[f2] \n\t" 559 "mul.s %[f0], %[f0], %[f2] \n\t"
483 "lwc1 %[f3], 0(%[aIm]) \n\t" 560 "lwc1 %[f3], 0(%[aIm]) \n\t"
484 "mul.s %[f9], %[f4], %[f5] \n\t" 561 "mul.s %[f9], %[f4], %[f5] \n\t"
485 "lwc1 %[f7], 4(%[aIm]) \n\t" 562 "lwc1 %[f7], 4(%[aIm]) \n\t"
486 "mul.s %[f4], %[f4], %[f6] \n\t" 563 "mul.s %[f4], %[f4], %[f6] \n\t"
487 #if !defined(MIPS32_R2_LE) 564 #if !defined(MIPS32_R2_LE)
488 "mul.s %[f10], %[f3], %[f2] \n\t" 565 "mul.s %[f10], %[f3], %[f2] \n\t"
489 "mul.s %[f1], %[f3], %[f1] \n\t" 566 "mul.s %[f1], %[f3], %[f1] \n\t"
490 "mul.s %[f11], %[f7], %[f6] \n\t" 567 "mul.s %[f11], %[f7], %[f6] \n\t"
491 "mul.s %[f5], %[f7], %[f5] \n\t" 568 "mul.s %[f5], %[f7], %[f5] \n\t"
492 "addiu %[aIm], %[aIm], 8 \n\t" 569 "addiu %[aIm], %[aIm], 8 \n\t"
493 "addiu %[bIm], %[bIm], 8 \n\t" 570 "addiu %[bIm], %[bIm], 8 \n\t"
494 "addiu %[len], %[len], -1 \n\t" 571 "addiu %[len], %[len], -1 \n\t"
495 "add.s %[f8], %[f8], %[f10] \n\t" 572 "add.s %[f8], %[f8], %[f10] \n\t"
496 "sub.s %[f1], %[f0], %[f1] \n\t" 573 "sub.s %[f1], %[f0], %[f1] \n\t"
497 "add.s %[f9], %[f9], %[f11] \n\t" 574 "add.s %[f9], %[f9], %[f11] \n\t"
498 "sub.s %[f5], %[f4], %[f5] \n\t" 575 "sub.s %[f5], %[f4], %[f5] \n\t"
499 #else // #if !defined(MIPS32_R2_LE) 576 #else // #if !defined(MIPS32_R2_LE)
500 "addiu %[aIm], %[aIm], 8 \n\t" 577 "addiu %[aIm], %[aIm], 8 \n\t"
501 "addiu %[bIm], %[bIm], 8 \n\t" 578 "addiu %[bIm], %[bIm], 8 \n\t"
502 "addiu %[len], %[len], -1 \n\t" 579 "addiu %[len], %[len], -1 \n\t"
503 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" 580 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
504 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" 581 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t"
505 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" 582 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t"
506 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" 583 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t"
507 #endif // #if !defined(MIPS32_R2_LE) 584 #endif // #if !defined(MIPS32_R2_LE)
508 "swc1 %[f8], 0(%[fft_tmp]) \n\t" 585 "swc1 %[f8], 0(%[fft_tmp]) \n\t"
509 "swc1 %[f1], 4(%[fft_tmp]) \n\t" 586 "swc1 %[f1], 4(%[fft_tmp]) \n\t"
510 "swc1 %[f9], 8(%[fft_tmp]) \n\t" 587 "swc1 %[f9], 8(%[fft_tmp]) \n\t"
511 "swc1 %[f5], 12(%[fft_tmp]) \n\t" 588 "swc1 %[f5], 12(%[fft_tmp]) \n\t"
512 "bgtz %[len], 1b \n\t" 589 "bgtz %[len], 1b \n\t"
513 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" 590 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
514 "lwc1 %[f0], 0(%[aRe]) \n\t" 591 "lwc1 %[f0], 0(%[aRe]) \n\t"
515 "lwc1 %[f1], 0(%[bRe]) \n\t" 592 "lwc1 %[f1], 0(%[bRe]) \n\t"
516 "lwc1 %[f2], 0(%[bIm]) \n\t" 593 "lwc1 %[f2], 0(%[bIm]) \n\t"
517 "lwc1 %[f3], 0(%[aIm]) \n\t" 594 "lwc1 %[f3], 0(%[aIm]) \n\t"
518 "mul.s %[f8], %[f0], %[f1] \n\t" 595 "mul.s %[f8], %[f0], %[f1] \n\t"
519 #if !defined(MIPS32_R2_LE) 596 #if !defined(MIPS32_R2_LE)
520 "mul.s %[f10], %[f3], %[f2] \n\t" 597 "mul.s %[f10], %[f3], %[f2] \n\t"
521 "add.s %[f8], %[f8], %[f10] \n\t" 598 "add.s %[f8], %[f8], %[f10] \n\t"
522 #else // #if !defined(MIPS32_R2_LE) 599 #else // #if !defined(MIPS32_R2_LE)
523 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" 600 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
524 #endif // #if !defined(MIPS32_R2_LE) 601 #endif // #if !defined(MIPS32_R2_LE)
525 "swc1 %[f8], 4(%[fft]) \n\t" 602 "swc1 %[f8], 4(%[fft]) \n\t"
526 ".set pop \n\t" 603 ".set pop \n\t"
527 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 604 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
528 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 605 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
529 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), 606 [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11),
530 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), 607 [f12] "=&f"(f12), [aRe] "+r"(aRe), [aIm] "+r"(aIm), [bRe] "+r"(bRe),
531 [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), 608 [bIm] "+r"(bIm), [fft_tmp] "=&r"(fft_tmp), [len] "+r"(len)
532 [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), 609 : [fft] "r"(fft)
533 [len] "+r" (len) 610 : "memory");
534 : [fft] "r" (fft)
535 : "memory"
536 );
537 611
538 aec_rdft_inverse_128(fft); 612 aec_rdft_inverse_128(fft);
539 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 613 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
540 614
541 // fft scaling 615 // fft scaling
542 { 616 {
543 float scale = 2.0f / PART_LEN2; 617 float scale = 2.0f / PART_LEN2;
544 __asm __volatile ( 618 __asm __volatile(
545 ".set push \n\t" 619 ".set push \n\t"
546 ".set noreorder \n\t" 620 ".set noreorder \n\t"
547 "addiu %[fft_tmp], %[fft], 0 \n\t" 621 "addiu %[fft_tmp], %[fft], 0 \n\t"
548 "addiu %[len], $zero, 8 \n\t" 622 "addiu %[len], $zero, 8 \n\t"
549 "1: \n\t" 623 "1: \n\t"
550 "addiu %[len], %[len], -1 \n\t" 624 "addiu %[len], %[len], -1 \n\t"
551 "lwc1 %[f0], 0(%[fft_tmp]) \n\t" 625 "lwc1 %[f0], 0(%[fft_tmp]) \n\t"
552 "lwc1 %[f1], 4(%[fft_tmp]) \n\t" 626 "lwc1 %[f1], 4(%[fft_tmp]) \n\t"
553 "lwc1 %[f2], 8(%[fft_tmp]) \n\t" 627 "lwc1 %[f2], 8(%[fft_tmp]) \n\t"
554 "lwc1 %[f3], 12(%[fft_tmp]) \n\t" 628 "lwc1 %[f3], 12(%[fft_tmp]) \n\t"
555 "mul.s %[f0], %[f0], %[scale] \n\t" 629 "mul.s %[f0], %[f0], %[scale] \n\t"
556 "mul.s %[f1], %[f1], %[scale] \n\t" 630 "mul.s %[f1], %[f1], %[scale] \n\t"
557 "mul.s %[f2], %[f2], %[scale] \n\t" 631 "mul.s %[f2], %[f2], %[scale] \n\t"
558 "mul.s %[f3], %[f3], %[scale] \n\t" 632 "mul.s %[f3], %[f3], %[scale] \n\t"
559 "lwc1 %[f4], 16(%[fft_tmp]) \n\t" 633 "lwc1 %[f4], 16(%[fft_tmp]) \n\t"
560 "lwc1 %[f5], 20(%[fft_tmp]) \n\t" 634 "lwc1 %[f5], 20(%[fft_tmp]) \n\t"
561 "lwc1 %[f6], 24(%[fft_tmp]) \n\t" 635 "lwc1 %[f6], 24(%[fft_tmp]) \n\t"
562 "lwc1 %[f7], 28(%[fft_tmp]) \n\t" 636 "lwc1 %[f7], 28(%[fft_tmp]) \n\t"
563 "mul.s %[f4], %[f4], %[scale] \n\t" 637 "mul.s %[f4], %[f4], %[scale] \n\t"
564 "mul.s %[f5], %[f5], %[scale] \n\t" 638 "mul.s %[f5], %[f5], %[scale] \n\t"
565 "mul.s %[f6], %[f6], %[scale] \n\t" 639 "mul.s %[f6], %[f6], %[scale] \n\t"
566 "mul.s %[f7], %[f7], %[scale] \n\t" 640 "mul.s %[f7], %[f7], %[scale] \n\t"
567 "swc1 %[f0], 0(%[fft_tmp]) \n\t" 641 "swc1 %[f0], 0(%[fft_tmp]) \n\t"
568 "swc1 %[f1], 4(%[fft_tmp]) \n\t" 642 "swc1 %[f1], 4(%[fft_tmp]) \n\t"
569 "swc1 %[f2], 8(%[fft_tmp]) \n\t" 643 "swc1 %[f2], 8(%[fft_tmp]) \n\t"
570 "swc1 %[f3], 12(%[fft_tmp]) \n\t" 644 "swc1 %[f3], 12(%[fft_tmp]) \n\t"
571 "swc1 %[f4], 16(%[fft_tmp]) \n\t" 645 "swc1 %[f4], 16(%[fft_tmp]) \n\t"
572 "swc1 %[f5], 20(%[fft_tmp]) \n\t" 646 "swc1 %[f5], 20(%[fft_tmp]) \n\t"
573 "swc1 %[f6], 24(%[fft_tmp]) \n\t" 647 "swc1 %[f6], 24(%[fft_tmp]) \n\t"
574 "swc1 %[f7], 28(%[fft_tmp]) \n\t" 648 "swc1 %[f7], 28(%[fft_tmp]) \n\t"
575 "bgtz %[len], 1b \n\t" 649 "bgtz %[len], 1b \n\t"
576 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" 650 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t"
577 ".set pop \n\t" 651 ".set pop \n\t"
578 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 652 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
579 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 653 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
580 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), 654 [len] "=&r"(len), [fft_tmp] "=&r"(fft_tmp)
581 [fft_tmp] "=&r" (fft_tmp) 655 : [scale] "f"(scale), [fft] "r"(fft)
582 : [scale] "f" (scale), [fft] "r" (fft) 656 : "memory");
583 : "memory"
584 );
585 } 657 }
586 aec_rdft_forward_128(fft); 658 aec_rdft_forward_128(fft);
587 aRe = h_fft_buf[0] + pos; 659 aRe = h_fft_buf[0] + pos;
588 aIm = h_fft_buf[1] + pos; 660 aIm = h_fft_buf[1] + pos;
589 __asm __volatile ( 661 __asm __volatile(
590 ".set push \n\t" 662 ".set push \n\t"
591 ".set noreorder \n\t" 663 ".set noreorder \n\t"
592 "addiu %[fft_tmp], %[fft], 0 \n\t" 664 "addiu %[fft_tmp], %[fft], 0 \n\t"
593 "addiu %[len], $zero, 31 \n\t" 665 "addiu %[len], $zero, 31 \n\t"
594 "lwc1 %[f0], 0(%[aRe]) \n\t" 666 "lwc1 %[f0], 0(%[aRe]) \n\t"
595 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" 667 "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
596 "lwc1 %[f2], 256(%[aRe]) \n\t" 668 "lwc1 %[f2], 256(%[aRe]) \n\t"
597 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" 669 "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
598 "lwc1 %[f4], 4(%[aRe]) \n\t" 670 "lwc1 %[f4], 4(%[aRe]) \n\t"
599 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" 671 "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
600 "lwc1 %[f6], 4(%[aIm]) \n\t" 672 "lwc1 %[f6], 4(%[aIm]) \n\t"
601 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" 673 "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
602 "add.s %[f0], %[f0], %[f1] \n\t" 674 "add.s %[f0], %[f0], %[f1] \n\t"
603 "add.s %[f2], %[f2], %[f3] \n\t" 675 "add.s %[f2], %[f2], %[f3] \n\t"
604 "add.s %[f4], %[f4], %[f5] \n\t" 676 "add.s %[f4], %[f4], %[f5] \n\t"
605 "add.s %[f6], %[f6], %[f7] \n\t" 677 "add.s %[f6], %[f6], %[f7] \n\t"
606 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" 678 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
607 "swc1 %[f0], 0(%[aRe]) \n\t" 679 "swc1 %[f0], 0(%[aRe]) \n\t"
608 "swc1 %[f2], 256(%[aRe]) \n\t" 680 "swc1 %[f2], 256(%[aRe]) \n\t"
609 "swc1 %[f4], 4(%[aRe]) \n\t" 681 "swc1 %[f4], 4(%[aRe]) \n\t"
610 "addiu %[aRe], %[aRe], 8 \n\t" 682 "addiu %[aRe], %[aRe], 8 \n\t"
611 "swc1 %[f6], 4(%[aIm]) \n\t" 683 "swc1 %[f6], 4(%[aIm]) \n\t"
612 "addiu %[aIm], %[aIm], 8 \n\t" 684 "addiu %[aIm], %[aIm], 8 \n\t"
613 "1: \n\t" 685 "1: \n\t"
614 "lwc1 %[f0], 0(%[aRe]) \n\t" 686 "lwc1 %[f0], 0(%[aRe]) \n\t"
615 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" 687 "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
616 "lwc1 %[f2], 0(%[aIm]) \n\t" 688 "lwc1 %[f2], 0(%[aIm]) \n\t"
617 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" 689 "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
618 "lwc1 %[f4], 4(%[aRe]) \n\t" 690 "lwc1 %[f4], 4(%[aRe]) \n\t"
619 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" 691 "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
620 "lwc1 %[f6], 4(%[aIm]) \n\t" 692 "lwc1 %[f6], 4(%[aIm]) \n\t"
621 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" 693 "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
622 "add.s %[f0], %[f0], %[f1] \n\t" 694 "add.s %[f0], %[f0], %[f1] \n\t"
623 "add.s %[f2], %[f2], %[f3] \n\t" 695 "add.s %[f2], %[f2], %[f3] \n\t"
624 "add.s %[f4], %[f4], %[f5] \n\t" 696 "add.s %[f4], %[f4], %[f5] \n\t"
625 "add.s %[f6], %[f6], %[f7] \n\t" 697 "add.s %[f6], %[f6], %[f7] \n\t"
626 "addiu %[len], %[len], -1 \n\t" 698 "addiu %[len], %[len], -1 \n\t"
627 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" 699 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
628 "swc1 %[f0], 0(%[aRe]) \n\t" 700 "swc1 %[f0], 0(%[aRe]) \n\t"
629 "swc1 %[f2], 0(%[aIm]) \n\t" 701 "swc1 %[f2], 0(%[aIm]) \n\t"
630 "swc1 %[f4], 4(%[aRe]) \n\t" 702 "swc1 %[f4], 4(%[aRe]) \n\t"
631 "addiu %[aRe], %[aRe], 8 \n\t" 703 "addiu %[aRe], %[aRe], 8 \n\t"
632 "swc1 %[f6], 4(%[aIm]) \n\t" 704 "swc1 %[f6], 4(%[aIm]) \n\t"
633 "bgtz %[len], 1b \n\t" 705 "bgtz %[len], 1b \n\t"
634 " addiu %[aIm], %[aIm], 8 \n\t" 706 " addiu %[aIm], %[aIm], 8 \n\t"
635 ".set pop \n\t" 707 ".set pop \n\t"
636 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 708 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
637 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 709 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
638 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), 710 [len] "=&r"(len), [fft_tmp] "=&r"(fft_tmp), [aRe] "+r"(aRe),
639 [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) 711 [aIm] "+r"(aIm)
640 : [fft] "r" (fft) 712 : [fft] "r"(fft)
641 : "memory" 713 : "memory");
642 );
643 } 714 }
644 } 715 }
645 716
646 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, 717 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
647 float hNl[PART_LEN1], 718 float hNl[PART_LEN1],
648 const float hNlFb, 719 const float hNlFb,
649 float efw[2][PART_LEN1]) { 720 float efw[2][PART_LEN1]) {
650 int i; 721 int i;
651 const float one = 1.0; 722 const float one = 1.0;
652 float* p_hNl; 723 float* p_hNl;
653 float* p_efw0; 724 float* p_efw0;
654 float* p_efw1; 725 float* p_efw1;
655 float* p_WebRtcAec_wC; 726 float* p_WebRtcAec_wC;
656 float temp1, temp2, temp3, temp4; 727 float temp1, temp2, temp3, temp4;
657 728
658 p_hNl = &hNl[0]; 729 p_hNl = &hNl[0];
659 p_efw0 = &efw[0][0]; 730 p_efw0 = &efw[0][0];
660 p_efw1 = &efw[1][0]; 731 p_efw1 = &efw[1][0];
661 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; 732 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
662 733
663 for (i = 0; i < PART_LEN1; i++) { 734 for (i = 0; i < PART_LEN1; i++) {
664 // Weight subbands 735 // Weight subbands
665 __asm __volatile ( 736 __asm __volatile(
666 ".set push \n\t" 737 ".set push \n\t"
667 ".set noreorder \n\t" 738 ".set noreorder \n\t"
668 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" 739 "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
669 "lwc1 %[temp2], 0(%[p_wC]) \n\t" 740 "lwc1 %[temp2], 0(%[p_wC]) \n\t"
670 "c.lt.s %[hNlFb], %[temp1] \n\t" 741 "c.lt.s %[hNlFb], %[temp1] \n\t"
671 "bc1f 1f \n\t" 742 "bc1f 1f \n\t"
672 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" 743 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t"
673 "sub.s %[temp4], %[one], %[temp2] \n\t" 744 "sub.s %[temp4], %[one], %[temp2] \n\t"
674 #if !defined(MIPS32_R2_LE) 745 #if !defined(MIPS32_R2_LE)
675 "mul.s %[temp1], %[temp1], %[temp4] \n\t" 746 "mul.s %[temp1], %[temp1], %[temp4] \n\t"
676 "add.s %[temp1], %[temp3], %[temp1] \n\t" 747 "add.s %[temp1], %[temp3], %[temp1] \n\t"
677 #else // #if !defined(MIPS32_R2_LE) 748 #else // #if !defined(MIPS32_R2_LE)
678 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" 749 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t"
679 #endif // #if !defined(MIPS32_R2_LE) 750 #endif // #if !defined(MIPS32_R2_LE)
680 "swc1 %[temp1], 0(%[p_hNl]) \n\t" 751 "swc1 %[temp1], 0(%[p_hNl]) \n\t"
681 "1: \n\t" 752 "1: \n\t"
682 "addiu %[p_wC], %[p_wC], 4 \n\t" 753 "addiu %[p_wC], %[p_wC], 4 \n\t"
683 ".set pop \n\t" 754 ".set pop \n\t"
684 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), 755 : [temp1] "=&f"(temp1), [temp2] "=&f"(temp2), [temp3] "=&f"(temp3),
685 [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) 756 [temp4] "=&f"(temp4), [p_wC] "+r"(p_WebRtcAec_wC)
686 : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) 757 : [hNlFb] "f"(hNlFb), [one] "f"(one), [p_hNl] "r"(p_hNl)
687 : "memory" 758 : "memory");
688 );
689 759
690 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 760 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
691 761
692 __asm __volatile ( 762 __asm __volatile(
693 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" 763 "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
694 "lwc1 %[temp3], 0(%[p_efw1]) \n\t" 764 "lwc1 %[temp3], 0(%[p_efw1]) \n\t"
695 "lwc1 %[temp2], 0(%[p_efw0]) \n\t" 765 "lwc1 %[temp2], 0(%[p_efw0]) \n\t"
696 "addiu %[p_hNl], %[p_hNl], 4 \n\t" 766 "addiu %[p_hNl], %[p_hNl], 4 \n\t"
697 "mul.s %[temp3], %[temp3], %[temp1] \n\t" 767 "mul.s %[temp3], %[temp3], %[temp1] \n\t"
698 "mul.s %[temp2], %[temp2], %[temp1] \n\t" 768 "mul.s %[temp2], %[temp2], %[temp1] \n\t"
699 "addiu %[p_efw0], %[p_efw0], 4 \n\t" 769 "addiu %[p_efw0], %[p_efw0], 4 \n\t"
700 "addiu %[p_efw1], %[p_efw1], 4 \n\t" 770 "addiu %[p_efw1], %[p_efw1], 4 \n\t"
701 "neg.s %[temp4], %[temp3] \n\t" 771 "neg.s %[temp4], %[temp3] \n\t"
702 "swc1 %[temp2], -4(%[p_efw0]) \n\t" 772 "swc1 %[temp2], -4(%[p_efw0]) \n\t"
703 "swc1 %[temp4], -4(%[p_efw1]) \n\t" 773 "swc1 %[temp4], -4(%[p_efw1]) \n\t"
704 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), 774 : [temp1] "=&f"(temp1), [temp2] "=&f"(temp2), [temp3] "=&f"(temp3),
705 [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), 775 [temp4] "=&f"(temp4), [p_efw0] "+r"(p_efw0), [p_efw1] "+r"(p_efw1),
706 [p_hNl] "+r" (p_hNl) 776 [p_hNl] "+r"(p_hNl)
707 : 777 :
708 : "memory" 778 : "memory");
709 );
710 } 779 }
711 } 780 }
712 781
713 void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, 782 void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled,
714 float normal_mu, 783 float normal_mu,
715 float normal_error_threshold, 784 float normal_error_threshold,
716 float x_pow[PART_LEN1], 785 float x_pow[PART_LEN1],
717 float ef[2][PART_LEN1]) { 786 float ef[2][PART_LEN1]) {
718 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; 787 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
719 const float error_threshold = extended_filter_enabled 788 const float error_threshold = extended_filter_enabled
720 ? kExtendedErrorThreshold 789 ? kExtendedErrorThreshold
721 : normal_error_threshold; 790 : normal_error_threshold;
722 int len = (PART_LEN1); 791 int len = (PART_LEN1);
723 float* ef0 = ef[0]; 792 float* ef0 = ef[0];
724 float* ef1 = ef[1]; 793 float* ef1 = ef[1];
725 float fac1 = 1e-10f; 794 float fac1 = 1e-10f;
726 float err_th2 = error_threshold * error_threshold; 795 float err_th2 = error_threshold * error_threshold;
727 float f0, f1, f2; 796 float f0, f1, f2;
728 #if !defined(MIPS32_R2_LE) 797 #if !defined(MIPS32_R2_LE)
729 float f3; 798 float f3;
730 #endif 799 #endif
731 800
732 __asm __volatile ( 801 __asm __volatile(
733 ".set push \n\t" 802 ".set push \n\t"
734 ".set noreorder \n\t" 803 ".set noreorder \n\t"
735 "1: \n\t" 804 "1: \n\t"
736 "lwc1 %[f0], 0(%[x_pow]) \n\t" 805 "lwc1 %[f0], 0(%[x_pow]) \n\t"
737 "lwc1 %[f1], 0(%[ef0]) \n\t" 806 "lwc1 %[f1], 0(%[ef0]) \n\t"
738 "lwc1 %[f2], 0(%[ef1]) \n\t" 807 "lwc1 %[f2], 0(%[ef1]) \n\t"
739 "add.s %[f0], %[f0], %[fac1] \n\t" 808 "add.s %[f0], %[f0], %[fac1] \n\t"
740 "div.s %[f1], %[f1], %[f0] \n\t" 809 "div.s %[f1], %[f1], %[f0] \n\t"
741 "div.s %[f2], %[f2], %[f0] \n\t" 810 "div.s %[f2], %[f2], %[f0] \n\t"
742 "mul.s %[f0], %[f1], %[f1] \n\t" 811 "mul.s %[f0], %[f1], %[f1] \n\t"
743 #if defined(MIPS32_R2_LE) 812 #if defined(MIPS32_R2_LE)
744 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" 813 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t"
745 #else 814 #else
746 "mul.s %[f3], %[f2], %[f2] \n\t" 815 "mul.s %[f3], %[f2], %[f2] \n\t"
747 "add.s %[f0], %[f0], %[f3] \n\t" 816 "add.s %[f0], %[f0], %[f3] \n\t"
748 #endif 817 #endif
749 "c.le.s %[f0], %[err_th2] \n\t" 818 "c.le.s %[f0], %[err_th2] \n\t"
750 "nop \n\t" 819 "nop \n\t"
751 "bc1t 2f \n\t" 820 "bc1t 2f \n\t"
752 " nop \n\t" 821 " nop \n\t"
753 "sqrt.s %[f0], %[f0] \n\t" 822 "sqrt.s %[f0], %[f0] \n\t"
754 "add.s %[f0], %[f0], %[fac1] \n\t" 823 "add.s %[f0], %[f0], %[fac1] \n\t"
755 "div.s %[f0], %[err_th], %[f0] \n\t" 824 "div.s %[f0], %[err_th], %[f0] \n\t"
756 "mul.s %[f1], %[f1], %[f0] \n\t" 825 "mul.s %[f1], %[f1], %[f0] \n\t"
757 "mul.s %[f2], %[f2], %[f0] \n\t" 826 "mul.s %[f2], %[f2], %[f0] \n\t"
758 "2: \n\t" 827 "2: \n\t"
759 "mul.s %[f1], %[f1], %[mu] \n\t" 828 "mul.s %[f1], %[f1], %[mu] \n\t"
760 "mul.s %[f2], %[f2], %[mu] \n\t" 829 "mul.s %[f2], %[f2], %[mu] \n\t"
761 "swc1 %[f1], 0(%[ef0]) \n\t" 830 "swc1 %[f1], 0(%[ef0]) \n\t"
762 "swc1 %[f2], 0(%[ef1]) \n\t" 831 "swc1 %[f2], 0(%[ef1]) \n\t"
763 "addiu %[len], %[len], -1 \n\t" 832 "addiu %[len], %[len], -1 \n\t"
764 "addiu %[x_pow], %[x_pow], 4 \n\t" 833 "addiu %[x_pow], %[x_pow], 4 \n\t"
765 "addiu %[ef0], %[ef0], 4 \n\t" 834 "addiu %[ef0], %[ef0], 4 \n\t"
766 "bgtz %[len], 1b \n\t" 835 "bgtz %[len], 1b \n\t"
767 " addiu %[ef1], %[ef1], 4 \n\t" 836 " addiu %[ef1], %[ef1], 4 \n\t"
768 ".set pop \n\t" 837 ".set pop \n\t"
769 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 838 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2),
770 #if !defined(MIPS32_R2_LE) 839 #if !defined(MIPS32_R2_LE)
771 [f3] "=&f" (f3), 840 [f3] "=&f"(f3),
772 #endif 841 #endif
773 [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), 842 [x_pow] "+r"(x_pow), [ef0] "+r"(ef0), [ef1] "+r"(ef1), [len] "+r"(len)
774 [len] "+r" (len) 843 : [fac1] "f"(fac1), [err_th2] "f"(err_th2), [mu] "f"(mu),
775 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), 844 [err_th] "f"(error_threshold)
776 [err_th] "f" (error_threshold) 845 : "memory");
777 : "memory"
778 );
779 } 846 }
780 847
781 void WebRtcAec_InitAec_mips(void) { 848 void WebRtcAec_InitAec_mips(void) {
782 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; 849 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
783 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; 850 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
784 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; 851 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
785 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; 852 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
786 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; 853 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
787 } 854 }
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | webrtc/modules/audio_processing/aec/aec_core_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698