| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 /* | |
| 12 * The core AEC algorithm, which is presented with time-aligned signals. | |
| 13 */ | |
| 14 | |
| 15 #include "webrtc/modules/audio_processing/aec/aec_core.h" | |
| 16 | |
| 17 #include <math.h> | |
| 18 | |
| 19 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | |
| 20 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" | |
| 21 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" | |
| 22 | |
| 23 extern const float WebRtcAec_weightCurve[65]; | |
| 24 extern const float WebRtcAec_overDriveCurve[65]; | |
| 25 | |
| 26 void WebRtcAec_ComfortNoise_mips(AecCore* aec, | |
| 27 float efw[2][PART_LEN1], | |
| 28 float comfortNoiseHband[2][PART_LEN1], | |
| 29 const float* noisePow, | |
| 30 const float* lambda) { | |
| 31 int i, num; | |
| 32 float rand[PART_LEN]; | |
| 33 float noise, noiseAvg, tmp, tmpAvg; | |
| 34 int16_t randW16[PART_LEN]; | |
| 35 complex_t u[PART_LEN1]; | |
| 36 | |
| 37 const float pi2 = 6.28318530717959f; | |
| 38 const float pi2t = pi2 / 32768; | |
| 39 | |
| 40 // Generate a uniform random array on [0 1] | |
| 41 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); | |
| 42 | |
| 43 int16_t* randWptr = randW16; | |
| 44 float randTemp, randTemp2, randTemp3, randTemp4; | |
| 45 int32_t tmp1s, tmp2s, tmp3s, tmp4s; | |
| 46 | |
| 47 for (i = 0; i < PART_LEN; i += 4) { | |
| 48 __asm __volatile ( | |
| 49 ".set push \n\t" | |
| 50 ".set noreorder \n\t" | |
| 51 "lh %[tmp1s], 0(%[randWptr]) \n\t" | |
| 52 "lh %[tmp2s], 2(%[randWptr]) \n\t" | |
| 53 "lh %[tmp3s], 4(%[randWptr]) \n\t" | |
| 54 "lh %[tmp4s], 6(%[randWptr]) \n\t" | |
| 55 "mtc1 %[tmp1s], %[randTemp] \n\t" | |
| 56 "mtc1 %[tmp2s], %[randTemp2] \n\t" | |
| 57 "mtc1 %[tmp3s], %[randTemp3] \n\t" | |
| 58 "mtc1 %[tmp4s], %[randTemp4] \n\t" | |
| 59 "cvt.s.w %[randTemp], %[randTemp] \n\t" | |
| 60 "cvt.s.w %[randTemp2], %[randTemp2] \n\t" | |
| 61 "cvt.s.w %[randTemp3], %[randTemp3] \n\t" | |
| 62 "cvt.s.w %[randTemp4], %[randTemp4] \n\t" | |
| 63 "addiu %[randWptr], %[randWptr], 8 \n\t" | |
| 64 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" | |
| 65 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" | |
| 66 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" | |
| 67 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" | |
| 68 ".set pop \n\t" | |
| 69 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), | |
| 70 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), | |
| 71 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), | |
| 72 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), | |
| 73 [tmp4s] "=&r" (tmp4s) | |
| 74 : [pi2t] "f" (pi2t) | |
| 75 : "memory" | |
| 76 ); | |
| 77 | |
| 78 u[i + 1][0] = cosf(randTemp); | |
| 79 u[i + 1][1] = sinf(randTemp); | |
| 80 u[i + 2][0] = cosf(randTemp2); | |
| 81 u[i + 2][1] = sinf(randTemp2); | |
| 82 u[i + 3][0] = cosf(randTemp3); | |
| 83 u[i + 3][1] = sinf(randTemp3); | |
| 84 u[i + 4][0] = cosf(randTemp4); | |
| 85 u[i + 4][1] = sinf(randTemp4); | |
| 86 } | |
| 87 | |
| 88 // Reject LF noise | |
| 89 float* u_ptr = &u[1][0]; | |
| 90 float noise2, noise3, noise4; | |
| 91 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; | |
| 92 | |
| 93 u[0][0] = 0; | |
| 94 u[0][1] = 0; | |
| 95 for (i = 1; i < PART_LEN1; i += 4) { | |
| 96 __asm __volatile ( | |
| 97 ".set push \n\t" | |
| 98 ".set noreorder \n\t" | |
| 99 "lwc1 %[noise], 4(%[noisePow]) \n\t" | |
| 100 "lwc1 %[noise2], 8(%[noisePow]) \n\t" | |
| 101 "lwc1 %[noise3], 12(%[noisePow]) \n\t" | |
| 102 "lwc1 %[noise4], 16(%[noisePow]) \n\t" | |
| 103 "sqrt.s %[noise], %[noise] \n\t" | |
| 104 "sqrt.s %[noise2], %[noise2] \n\t" | |
| 105 "sqrt.s %[noise3], %[noise3] \n\t" | |
| 106 "sqrt.s %[noise4], %[noise4] \n\t" | |
| 107 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" | |
| 108 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" | |
| 109 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" | |
| 110 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" | |
| 111 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" | |
| 112 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" | |
| 113 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" | |
| 114 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" | |
| 115 "addiu %[noisePow], %[noisePow], 16 \n\t" | |
| 116 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" | |
| 117 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" | |
| 118 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" | |
| 119 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" | |
| 120 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" | |
| 121 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" | |
| 122 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" | |
| 123 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" | |
| 124 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" | |
| 125 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" | |
| 126 "neg.s %[tmp2f] \n\t" | |
| 127 "neg.s %[tmp4f] \n\t" | |
| 128 "neg.s %[tmp6f] \n\t" | |
| 129 "neg.s %[tmp8f] \n\t" | |
| 130 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" | |
| 131 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" | |
| 132 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" | |
| 133 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" | |
| 134 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" | |
| 135 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" | |
| 136 "addiu %[u_ptr], %[u_ptr], 32 \n\t" | |
| 137 ".set pop \n\t" | |
| 138 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), | |
| 139 [noise] "=&f" (noise), [noise2] "=&f" (noise2), | |
| 140 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), | |
| 141 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), | |
| 142 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), | |
| 143 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), | |
| 144 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) | |
| 145 : | |
| 146 : "memory" | |
| 147 ); | |
| 148 } | |
| 149 u[PART_LEN][1] = 0; | |
| 150 noisePow -= PART_LEN; | |
| 151 | |
| 152 u_ptr = &u[0][0]; | |
| 153 float* u_ptr_end = &u[PART_LEN][0]; | |
| 154 float* efw_ptr_0 = &efw[0][0]; | |
| 155 float* efw_ptr_1 = &efw[1][0]; | |
| 156 float tmp9f, tmp10f; | |
| 157 const float tmp1c = 1.0; | |
| 158 | |
| 159 __asm __volatile ( | |
| 160 ".set push \n\t" | |
| 161 ".set noreorder \n\t" | |
| 162 "1: \n\t" | |
| 163 "lwc1 %[tmp1f], 0(%[lambda]) \n\t" | |
| 164 "lwc1 %[tmp6f], 4(%[lambda]) \n\t" | |
| 165 "addiu %[lambda], %[lambda], 8 \n\t" | |
| 166 "c.lt.s %[tmp1f], %[tmp1c] \n\t" | |
| 167 "bc1f 4f \n\t" | |
| 168 " nop \n\t" | |
| 169 "c.lt.s %[tmp6f], %[tmp1c] \n\t" | |
| 170 "bc1f 3f \n\t" | |
| 171 " nop \n\t" | |
| 172 "2: \n\t" | |
| 173 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" | |
| 174 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" | |
| 175 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" | |
| 176 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" | |
| 177 "sqrt.s %[tmp1f], %[tmp1f] \n\t" | |
| 178 "sqrt.s %[tmp6f], %[tmp6f] \n\t" | |
| 179 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | |
| 180 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" | |
| 181 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | |
| 182 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" | |
| 183 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | |
| 184 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" | |
| 185 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | |
| 186 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" | |
| 187 #if !defined(MIPS32_R2_LE) | |
| 188 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" | |
| 189 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" | |
| 190 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" | |
| 191 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" | |
| 192 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" | |
| 193 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" | |
| 194 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" | |
| 195 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" | |
| 196 #else // #if !defined(MIPS32_R2_LE) | |
| 197 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" | |
| 198 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" | |
| 199 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" | |
| 200 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" | |
| 201 #endif // #if !defined(MIPS32_R2_LE) | |
| 202 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | |
| 203 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | |
| 204 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | |
| 205 "b 5f \n\t" | |
| 206 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | |
| 207 "3: \n\t" | |
| 208 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" | |
| 209 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" | |
| 210 "sqrt.s %[tmp1f], %[tmp1f] \n\t" | |
| 211 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | |
| 212 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" | |
| 213 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | |
| 214 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" | |
| 215 #if !defined(MIPS32_R2_LE) | |
| 216 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" | |
| 217 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" | |
| 218 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" | |
| 219 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" | |
| 220 #else // #if !defined(MIPS32_R2_LE) | |
| 221 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" | |
| 222 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" | |
| 223 #endif // #if !defined(MIPS32_R2_LE) | |
| 224 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" | |
| 225 "b 5f \n\t" | |
| 226 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" | |
| 227 "4: \n\t" | |
| 228 "c.lt.s %[tmp6f], %[tmp1c] \n\t" | |
| 229 "bc1f 5f \n\t" | |
| 230 " nop \n\t" | |
| 231 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" | |
| 232 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" | |
| 233 "sqrt.s %[tmp6f], %[tmp6f] \n\t" | |
| 234 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | |
| 235 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" | |
| 236 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | |
| 237 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" | |
| 238 #if !defined(MIPS32_R2_LE) | |
| 239 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" | |
| 240 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" | |
| 241 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" | |
| 242 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" | |
| 243 #else // #if !defined(MIPS32_R2_LE) | |
| 244 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" | |
| 245 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" | |
| 246 #endif // #if !defined(MIPS32_R2_LE) | |
| 247 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" | |
| 248 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" | |
| 249 "5: \n\t" | |
| 250 "addiu %[u_ptr], %[u_ptr], 16 \n\t" | |
| 251 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" | |
| 252 "bne %[u_ptr], %[u_ptr_end], 1b \n\t" | |
| 253 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" | |
| 254 ".set pop \n\t" | |
| 255 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), | |
| 256 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), | |
| 257 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), | |
| 258 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), | |
| 259 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), | |
| 260 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) | |
| 261 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) | |
| 262 : "memory" | |
| 263 ); | |
| 264 | |
| 265 lambda -= PART_LEN; | |
| 266 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); | |
| 267 // tmp = 1 - lambda[i]; | |
| 268 efw[0][PART_LEN] += tmp * u[PART_LEN][0]; | |
| 269 efw[1][PART_LEN] += tmp * u[PART_LEN][1]; | |
| 270 | |
| 271 // For H band comfort noise | |
| 272 // TODO: don't compute noise and "tmp" twice. Use the previous results. | |
| 273 noiseAvg = 0.0; | |
| 274 tmpAvg = 0.0; | |
| 275 num = 0; | |
| 276 if (aec->num_bands > 1) { | |
| 277 for (i = 0; i < PART_LEN; i++) { | |
| 278 rand[i] = ((float)randW16[i]) / 32768; | |
| 279 } | |
| 280 | |
| 281 // average noise scale | |
| 282 // average over second half of freq spectrum (i.e., 4->8khz) | |
| 283 // TODO: we shouldn't need num. We know how many elements we're summing. | |
| 284 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { | |
| 285 num++; | |
| 286 noiseAvg += sqrtf(noisePow[i]); | |
| 287 } | |
| 288 noiseAvg /= (float)num; | |
| 289 | |
| 290 // average nlp scale | |
| 291 // average over second half of freq spectrum (i.e., 4->8khz) | |
| 292 // TODO: we shouldn't need num. We know how many elements we're summing. | |
| 293 num = 0; | |
| 294 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { | |
| 295 num++; | |
| 296 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); | |
| 297 } | |
| 298 tmpAvg /= (float)num; | |
| 299 | |
| 300 // Use average noise for H band | |
| 301 // TODO: we should probably have a new random vector here. | |
| 302 // Reject LF noise | |
| 303 u[0][0] = 0; | |
| 304 u[0][1] = 0; | |
| 305 for (i = 1; i < PART_LEN1; i++) { | |
| 306 tmp = pi2 * rand[i - 1]; | |
| 307 | |
| 308 // Use average noise for H band | |
| 309 u[i][0] = noiseAvg * (float)cos(tmp); | |
| 310 u[i][1] = -noiseAvg * (float)sin(tmp); | |
| 311 } | |
| 312 u[PART_LEN][1] = 0; | |
| 313 | |
| 314 for (i = 0; i < PART_LEN1; i++) { | |
| 315 // Use average NLP weight for H band | |
| 316 comfortNoiseHband[0][i] = tmpAvg * u[i][0]; | |
| 317 comfortNoiseHband[1][i] = tmpAvg * u[i][1]; | |
| 318 } | |
| 319 } else { | |
| 320 memset(comfortNoiseHband, 0, | |
| 321 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); | |
| 322 } | |
| 323 } | |
| 324 | |
| 325 void WebRtcAec_FilterFar_mips( | |
| 326 int num_partitions, | |
| 327 int x_fft_buf_block_pos, | |
| 328 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | |
| 329 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | |
| 330 float y_fft[2][PART_LEN1]) { | |
| 331 int i; | |
| 332 for (i = 0; i < num_partitions; i++) { | |
| 333 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; | |
| 334 int pos = i * PART_LEN1; | |
| 335 // Check for wrap | |
| 336 if (i + x_fft_buf_block_pos >= num_partitions) { | |
| 337 xPos -= num_partitions * (PART_LEN1); | |
| 338 } | |
| 339 float* yf0 = y_fft[0]; | |
| 340 float* yf1 = y_fft[1]; | |
| 341 float* aRe = x_fft_buf[0] + xPos; | |
| 342 float* aIm = x_fft_buf[1] + xPos; | |
| 343 float* bRe = h_fft_buf[0] + pos; | |
| 344 float* bIm = h_fft_buf[1] + pos; | |
| 345 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; | |
| 346 int len = PART_LEN1 >> 1; | |
| 347 | |
| 348 __asm __volatile ( | |
| 349 ".set push \n\t" | |
| 350 ".set noreorder \n\t" | |
| 351 "1: \n\t" | |
| 352 "lwc1 %[f0], 0(%[aRe]) \n\t" | |
| 353 "lwc1 %[f1], 0(%[bRe]) \n\t" | |
| 354 "lwc1 %[f2], 0(%[bIm]) \n\t" | |
| 355 "lwc1 %[f3], 0(%[aIm]) \n\t" | |
| 356 "lwc1 %[f4], 4(%[aRe]) \n\t" | |
| 357 "lwc1 %[f5], 4(%[bRe]) \n\t" | |
| 358 "lwc1 %[f6], 4(%[bIm]) \n\t" | |
| 359 "mul.s %[f8], %[f0], %[f1] \n\t" | |
| 360 "mul.s %[f0], %[f0], %[f2] \n\t" | |
| 361 "mul.s %[f9], %[f4], %[f5] \n\t" | |
| 362 "mul.s %[f4], %[f4], %[f6] \n\t" | |
| 363 "lwc1 %[f7], 4(%[aIm]) \n\t" | |
| 364 #if !defined(MIPS32_R2_LE) | |
| 365 "mul.s %[f12], %[f2], %[f3] \n\t" | |
| 366 "mul.s %[f1], %[f3], %[f1] \n\t" | |
| 367 "mul.s %[f11], %[f6], %[f7] \n\t" | |
| 368 "addiu %[aRe], %[aRe], 8 \n\t" | |
| 369 "addiu %[aIm], %[aIm], 8 \n\t" | |
| 370 "addiu %[len], %[len], -1 \n\t" | |
| 371 "sub.s %[f8], %[f8], %[f12] \n\t" | |
| 372 "mul.s %[f12], %[f7], %[f5] \n\t" | |
| 373 "lwc1 %[f2], 0(%[yf0]) \n\t" | |
| 374 "add.s %[f1], %[f0], %[f1] \n\t" | |
| 375 "lwc1 %[f3], 0(%[yf1]) \n\t" | |
| 376 "sub.s %[f9], %[f9], %[f11] \n\t" | |
| 377 "lwc1 %[f6], 4(%[yf0]) \n\t" | |
| 378 "add.s %[f4], %[f4], %[f12] \n\t" | |
| 379 #else // #if !defined(MIPS32_R2_LE) | |
| 380 "addiu %[aRe], %[aRe], 8 \n\t" | |
| 381 "addiu %[aIm], %[aIm], 8 \n\t" | |
| 382 "addiu %[len], %[len], -1 \n\t" | |
| 383 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" | |
| 384 "lwc1 %[f2], 0(%[yf0]) \n\t" | |
| 385 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" | |
| 386 "lwc1 %[f3], 0(%[yf1]) \n\t" | |
| 387 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" | |
| 388 "lwc1 %[f6], 4(%[yf0]) \n\t" | |
| 389 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" | |
| 390 #endif // #if !defined(MIPS32_R2_LE) | |
| 391 "lwc1 %[f5], 4(%[yf1]) \n\t" | |
| 392 "add.s %[f2], %[f2], %[f8] \n\t" | |
| 393 "addiu %[bRe], %[bRe], 8 \n\t" | |
| 394 "addiu %[bIm], %[bIm], 8 \n\t" | |
| 395 "add.s %[f3], %[f3], %[f1] \n\t" | |
| 396 "add.s %[f6], %[f6], %[f9] \n\t" | |
| 397 "add.s %[f5], %[f5], %[f4] \n\t" | |
| 398 "swc1 %[f2], 0(%[yf0]) \n\t" | |
| 399 "swc1 %[f3], 0(%[yf1]) \n\t" | |
| 400 "swc1 %[f6], 4(%[yf0]) \n\t" | |
| 401 "swc1 %[f5], 4(%[yf1]) \n\t" | |
| 402 "addiu %[yf0], %[yf0], 8 \n\t" | |
| 403 "bgtz %[len], 1b \n\t" | |
| 404 " addiu %[yf1], %[yf1], 8 \n\t" | |
| 405 "lwc1 %[f0], 0(%[aRe]) \n\t" | |
| 406 "lwc1 %[f1], 0(%[bRe]) \n\t" | |
| 407 "lwc1 %[f2], 0(%[bIm]) \n\t" | |
| 408 "lwc1 %[f3], 0(%[aIm]) \n\t" | |
| 409 "mul.s %[f8], %[f0], %[f1] \n\t" | |
| 410 "mul.s %[f0], %[f0], %[f2] \n\t" | |
| 411 #if !defined(MIPS32_R2_LE) | |
| 412 "mul.s %[f12], %[f2], %[f3] \n\t" | |
| 413 "mul.s %[f1], %[f3], %[f1] \n\t" | |
| 414 "sub.s %[f8], %[f8], %[f12] \n\t" | |
| 415 "lwc1 %[f2], 0(%[yf0]) \n\t" | |
| 416 "add.s %[f1], %[f0], %[f1] \n\t" | |
| 417 "lwc1 %[f3], 0(%[yf1]) \n\t" | |
| 418 #else // #if !defined(MIPS32_R2_LE) | |
| 419 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" | |
| 420 "lwc1 %[f2], 0(%[yf0]) \n\t" | |
| 421 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" | |
| 422 "lwc1 %[f3], 0(%[yf1]) \n\t" | |
| 423 #endif // #if !defined(MIPS32_R2_LE) | |
| 424 "add.s %[f2], %[f2], %[f8] \n\t" | |
| 425 "add.s %[f3], %[f3], %[f1] \n\t" | |
| 426 "swc1 %[f2], 0(%[yf0]) \n\t" | |
| 427 "swc1 %[f3], 0(%[yf1]) \n\t" | |
| 428 ".set pop \n\t" | |
| 429 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | |
| 430 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | |
| 431 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), | |
| 432 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), | |
| 433 [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), | |
| 434 [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), | |
| 435 [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) | |
| 436 : | |
| 437 : "memory" | |
| 438 ); | |
| 439 } | |
| 440 } | |
| 441 | |
| 442 void WebRtcAec_FilterAdaptation_mips( | |
| 443 int num_partitions, | |
| 444 int x_fft_buf_block_pos, | |
| 445 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | |
| 446 float e_fft[2][PART_LEN1], | |
| 447 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { | |
| 448 float fft[PART_LEN2]; | |
| 449 int i; | |
| 450 for (i = 0; i < num_partitions; i++) { | |
| 451 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); | |
| 452 int pos; | |
| 453 // Check for wrap | |
| 454 if (i + x_fft_buf_block_pos >= num_partitions) { | |
| 455 xPos -= num_partitions * PART_LEN1; | |
| 456 } | |
| 457 | |
| 458 pos = i * PART_LEN1; | |
| 459 float* aRe = x_fft_buf[0] + xPos; | |
| 460 float* aIm = x_fft_buf[1] + xPos; | |
| 461 float* bRe = e_fft[0]; | |
| 462 float* bIm = e_fft[1]; | |
| 463 float* fft_tmp; | |
| 464 | |
| 465 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12; | |
| 466 int len = PART_LEN >> 1; | |
| 467 | |
| 468 __asm __volatile ( | |
| 469 ".set push \n\t" | |
| 470 ".set noreorder \n\t" | |
| 471 "addiu %[fft_tmp], %[fft], 0 \n\t" | |
| 472 "1: \n\t" | |
| 473 "lwc1 %[f0], 0(%[aRe]) \n\t" | |
| 474 "lwc1 %[f1], 0(%[bRe]) \n\t" | |
| 475 "lwc1 %[f2], 0(%[bIm]) \n\t" | |
| 476 "lwc1 %[f4], 4(%[aRe]) \n\t" | |
| 477 "lwc1 %[f5], 4(%[bRe]) \n\t" | |
| 478 "lwc1 %[f6], 4(%[bIm]) \n\t" | |
| 479 "addiu %[aRe], %[aRe], 8 \n\t" | |
| 480 "addiu %[bRe], %[bRe], 8 \n\t" | |
| 481 "mul.s %[f8], %[f0], %[f1] \n\t" | |
| 482 "mul.s %[f0], %[f0], %[f2] \n\t" | |
| 483 "lwc1 %[f3], 0(%[aIm]) \n\t" | |
| 484 "mul.s %[f9], %[f4], %[f5] \n\t" | |
| 485 "lwc1 %[f7], 4(%[aIm]) \n\t" | |
| 486 "mul.s %[f4], %[f4], %[f6] \n\t" | |
| 487 #if !defined(MIPS32_R2_LE) | |
| 488 "mul.s %[f10], %[f3], %[f2] \n\t" | |
| 489 "mul.s %[f1], %[f3], %[f1] \n\t" | |
| 490 "mul.s %[f11], %[f7], %[f6] \n\t" | |
| 491 "mul.s %[f5], %[f7], %[f5] \n\t" | |
| 492 "addiu %[aIm], %[aIm], 8 \n\t" | |
| 493 "addiu %[bIm], %[bIm], 8 \n\t" | |
| 494 "addiu %[len], %[len], -1 \n\t" | |
| 495 "add.s %[f8], %[f8], %[f10] \n\t" | |
| 496 "sub.s %[f1], %[f0], %[f1] \n\t" | |
| 497 "add.s %[f9], %[f9], %[f11] \n\t" | |
| 498 "sub.s %[f5], %[f4], %[f5] \n\t" | |
| 499 #else // #if !defined(MIPS32_R2_LE) | |
| 500 "addiu %[aIm], %[aIm], 8 \n\t" | |
| 501 "addiu %[bIm], %[bIm], 8 \n\t" | |
| 502 "addiu %[len], %[len], -1 \n\t" | |
| 503 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" | |
| 504 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" | |
| 505 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" | |
| 506 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" | |
| 507 #endif // #if !defined(MIPS32_R2_LE) | |
| 508 "swc1 %[f8], 0(%[fft_tmp]) \n\t" | |
| 509 "swc1 %[f1], 4(%[fft_tmp]) \n\t" | |
| 510 "swc1 %[f9], 8(%[fft_tmp]) \n\t" | |
| 511 "swc1 %[f5], 12(%[fft_tmp]) \n\t" | |
| 512 "bgtz %[len], 1b \n\t" | |
| 513 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" | |
| 514 "lwc1 %[f0], 0(%[aRe]) \n\t" | |
| 515 "lwc1 %[f1], 0(%[bRe]) \n\t" | |
| 516 "lwc1 %[f2], 0(%[bIm]) \n\t" | |
| 517 "lwc1 %[f3], 0(%[aIm]) \n\t" | |
| 518 "mul.s %[f8], %[f0], %[f1] \n\t" | |
| 519 #if !defined(MIPS32_R2_LE) | |
| 520 "mul.s %[f10], %[f3], %[f2] \n\t" | |
| 521 "add.s %[f8], %[f8], %[f10] \n\t" | |
| 522 #else // #if !defined(MIPS32_R2_LE) | |
| 523 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" | |
| 524 #endif // #if !defined(MIPS32_R2_LE) | |
| 525 "swc1 %[f8], 4(%[fft]) \n\t" | |
| 526 ".set pop \n\t" | |
| 527 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | |
| 528 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | |
| 529 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), | |
| 530 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), | |
| 531 [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), | |
| 532 [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), | |
| 533 [len] "+r" (len) | |
| 534 : [fft] "r" (fft) | |
| 535 : "memory" | |
| 536 ); | |
| 537 | |
| 538 aec_rdft_inverse_128(fft); | |
| 539 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); | |
| 540 | |
| 541 // fft scaling | |
| 542 { | |
| 543 float scale = 2.0f / PART_LEN2; | |
| 544 __asm __volatile ( | |
| 545 ".set push \n\t" | |
| 546 ".set noreorder \n\t" | |
| 547 "addiu %[fft_tmp], %[fft], 0 \n\t" | |
| 548 "addiu %[len], $zero, 8 \n\t" | |
| 549 "1: \n\t" | |
| 550 "addiu %[len], %[len], -1 \n\t" | |
| 551 "lwc1 %[f0], 0(%[fft_tmp]) \n\t" | |
| 552 "lwc1 %[f1], 4(%[fft_tmp]) \n\t" | |
| 553 "lwc1 %[f2], 8(%[fft_tmp]) \n\t" | |
| 554 "lwc1 %[f3], 12(%[fft_tmp]) \n\t" | |
| 555 "mul.s %[f0], %[f0], %[scale] \n\t" | |
| 556 "mul.s %[f1], %[f1], %[scale] \n\t" | |
| 557 "mul.s %[f2], %[f2], %[scale] \n\t" | |
| 558 "mul.s %[f3], %[f3], %[scale] \n\t" | |
| 559 "lwc1 %[f4], 16(%[fft_tmp]) \n\t" | |
| 560 "lwc1 %[f5], 20(%[fft_tmp]) \n\t" | |
| 561 "lwc1 %[f6], 24(%[fft_tmp]) \n\t" | |
| 562 "lwc1 %[f7], 28(%[fft_tmp]) \n\t" | |
| 563 "mul.s %[f4], %[f4], %[scale] \n\t" | |
| 564 "mul.s %[f5], %[f5], %[scale] \n\t" | |
| 565 "mul.s %[f6], %[f6], %[scale] \n\t" | |
| 566 "mul.s %[f7], %[f7], %[scale] \n\t" | |
| 567 "swc1 %[f0], 0(%[fft_tmp]) \n\t" | |
| 568 "swc1 %[f1], 4(%[fft_tmp]) \n\t" | |
| 569 "swc1 %[f2], 8(%[fft_tmp]) \n\t" | |
| 570 "swc1 %[f3], 12(%[fft_tmp]) \n\t" | |
| 571 "swc1 %[f4], 16(%[fft_tmp]) \n\t" | |
| 572 "swc1 %[f5], 20(%[fft_tmp]) \n\t" | |
| 573 "swc1 %[f6], 24(%[fft_tmp]) \n\t" | |
| 574 "swc1 %[f7], 28(%[fft_tmp]) \n\t" | |
| 575 "bgtz %[len], 1b \n\t" | |
| 576 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" | |
| 577 ".set pop \n\t" | |
| 578 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | |
| 579 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | |
| 580 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), | |
| 581 [fft_tmp] "=&r" (fft_tmp) | |
| 582 : [scale] "f" (scale), [fft] "r" (fft) | |
| 583 : "memory" | |
| 584 ); | |
| 585 } | |
| 586 aec_rdft_forward_128(fft); | |
| 587 aRe = h_fft_buf[0] + pos; | |
| 588 aIm = h_fft_buf[1] + pos; | |
| 589 __asm __volatile ( | |
| 590 ".set push \n\t" | |
| 591 ".set noreorder \n\t" | |
| 592 "addiu %[fft_tmp], %[fft], 0 \n\t" | |
| 593 "addiu %[len], $zero, 31 \n\t" | |
| 594 "lwc1 %[f0], 0(%[aRe]) \n\t" | |
| 595 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" | |
| 596 "lwc1 %[f2], 256(%[aRe]) \n\t" | |
| 597 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" | |
| 598 "lwc1 %[f4], 4(%[aRe]) \n\t" | |
| 599 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" | |
| 600 "lwc1 %[f6], 4(%[aIm]) \n\t" | |
| 601 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" | |
| 602 "add.s %[f0], %[f0], %[f1] \n\t" | |
| 603 "add.s %[f2], %[f2], %[f3] \n\t" | |
| 604 "add.s %[f4], %[f4], %[f5] \n\t" | |
| 605 "add.s %[f6], %[f6], %[f7] \n\t" | |
| 606 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" | |
| 607 "swc1 %[f0], 0(%[aRe]) \n\t" | |
| 608 "swc1 %[f2], 256(%[aRe]) \n\t" | |
| 609 "swc1 %[f4], 4(%[aRe]) \n\t" | |
| 610 "addiu %[aRe], %[aRe], 8 \n\t" | |
| 611 "swc1 %[f6], 4(%[aIm]) \n\t" | |
| 612 "addiu %[aIm], %[aIm], 8 \n\t" | |
| 613 "1: \n\t" | |
| 614 "lwc1 %[f0], 0(%[aRe]) \n\t" | |
| 615 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" | |
| 616 "lwc1 %[f2], 0(%[aIm]) \n\t" | |
| 617 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" | |
| 618 "lwc1 %[f4], 4(%[aRe]) \n\t" | |
| 619 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" | |
| 620 "lwc1 %[f6], 4(%[aIm]) \n\t" | |
| 621 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" | |
| 622 "add.s %[f0], %[f0], %[f1] \n\t" | |
| 623 "add.s %[f2], %[f2], %[f3] \n\t" | |
| 624 "add.s %[f4], %[f4], %[f5] \n\t" | |
| 625 "add.s %[f6], %[f6], %[f7] \n\t" | |
| 626 "addiu %[len], %[len], -1 \n\t" | |
| 627 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" | |
| 628 "swc1 %[f0], 0(%[aRe]) \n\t" | |
| 629 "swc1 %[f2], 0(%[aIm]) \n\t" | |
| 630 "swc1 %[f4], 4(%[aRe]) \n\t" | |
| 631 "addiu %[aRe], %[aRe], 8 \n\t" | |
| 632 "swc1 %[f6], 4(%[aIm]) \n\t" | |
| 633 "bgtz %[len], 1b \n\t" | |
| 634 " addiu %[aIm], %[aIm], 8 \n\t" | |
| 635 ".set pop \n\t" | |
| 636 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | |
| 637 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), | |
| 638 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), | |
| 639 [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) | |
| 640 : [fft] "r" (fft) | |
| 641 : "memory" | |
| 642 ); | |
| 643 } | |
| 644 } | |
| 645 | |
| 646 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, | |
| 647 float hNl[PART_LEN1], | |
| 648 const float hNlFb, | |
| 649 float efw[2][PART_LEN1]) { | |
| 650 int i; | |
| 651 const float one = 1.0; | |
| 652 float* p_hNl; | |
| 653 float* p_efw0; | |
| 654 float* p_efw1; | |
| 655 float* p_WebRtcAec_wC; | |
| 656 float temp1, temp2, temp3, temp4; | |
| 657 | |
| 658 p_hNl = &hNl[0]; | |
| 659 p_efw0 = &efw[0][0]; | |
| 660 p_efw1 = &efw[1][0]; | |
| 661 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; | |
| 662 | |
| 663 for (i = 0; i < PART_LEN1; i++) { | |
| 664 // Weight subbands | |
| 665 __asm __volatile ( | |
| 666 ".set push \n\t" | |
| 667 ".set noreorder \n\t" | |
| 668 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" | |
| 669 "lwc1 %[temp2], 0(%[p_wC]) \n\t" | |
| 670 "c.lt.s %[hNlFb], %[temp1] \n\t" | |
| 671 "bc1f 1f \n\t" | |
| 672 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" | |
| 673 "sub.s %[temp4], %[one], %[temp2] \n\t" | |
| 674 #if !defined(MIPS32_R2_LE) | |
| 675 "mul.s %[temp1], %[temp1], %[temp4] \n\t" | |
| 676 "add.s %[temp1], %[temp3], %[temp1] \n\t" | |
| 677 #else // #if !defined(MIPS32_R2_LE) | |
| 678 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" | |
| 679 #endif // #if !defined(MIPS32_R2_LE) | |
| 680 "swc1 %[temp1], 0(%[p_hNl]) \n\t" | |
| 681 "1: \n\t" | |
| 682 "addiu %[p_wC], %[p_wC], 4 \n\t" | |
| 683 ".set pop \n\t" | |
| 684 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), | |
| 685 [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) | |
| 686 : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) | |
| 687 : "memory" | |
| 688 ); | |
| 689 | |
| 690 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); | |
| 691 | |
| 692 __asm __volatile ( | |
| 693 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" | |
| 694 "lwc1 %[temp3], 0(%[p_efw1]) \n\t" | |
| 695 "lwc1 %[temp2], 0(%[p_efw0]) \n\t" | |
| 696 "addiu %[p_hNl], %[p_hNl], 4 \n\t" | |
| 697 "mul.s %[temp3], %[temp3], %[temp1] \n\t" | |
| 698 "mul.s %[temp2], %[temp2], %[temp1] \n\t" | |
| 699 "addiu %[p_efw0], %[p_efw0], 4 \n\t" | |
| 700 "addiu %[p_efw1], %[p_efw1], 4 \n\t" | |
| 701 "neg.s %[temp4], %[temp3] \n\t" | |
| 702 "swc1 %[temp2], -4(%[p_efw0]) \n\t" | |
| 703 "swc1 %[temp4], -4(%[p_efw1]) \n\t" | |
| 704 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), | |
| 705 [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), | |
| 706 [p_hNl] "+r" (p_hNl) | |
| 707 : | |
| 708 : "memory" | |
| 709 ); | |
| 710 } | |
| 711 } | |
| 712 | |
| 713 void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, | |
| 714 float normal_mu, | |
| 715 float normal_error_threshold, | |
| 716 float x_pow[PART_LEN1], | |
| 717 float ef[2][PART_LEN1]) { | |
| 718 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; | |
| 719 const float error_threshold = extended_filter_enabled | |
| 720 ? kExtendedErrorThreshold | |
| 721 : normal_error_threshold; | |
| 722 int len = (PART_LEN1); | |
| 723 float* ef0 = ef[0]; | |
| 724 float* ef1 = ef[1]; | |
| 725 float fac1 = 1e-10f; | |
| 726 float err_th2 = error_threshold * error_threshold; | |
| 727 float f0, f1, f2; | |
| 728 #if !defined(MIPS32_R2_LE) | |
| 729 float f3; | |
| 730 #endif | |
| 731 | |
| 732 __asm __volatile ( | |
| 733 ".set push \n\t" | |
| 734 ".set noreorder \n\t" | |
| 735 "1: \n\t" | |
| 736 "lwc1 %[f0], 0(%[x_pow]) \n\t" | |
| 737 "lwc1 %[f1], 0(%[ef0]) \n\t" | |
| 738 "lwc1 %[f2], 0(%[ef1]) \n\t" | |
| 739 "add.s %[f0], %[f0], %[fac1] \n\t" | |
| 740 "div.s %[f1], %[f1], %[f0] \n\t" | |
| 741 "div.s %[f2], %[f2], %[f0] \n\t" | |
| 742 "mul.s %[f0], %[f1], %[f1] \n\t" | |
| 743 #if defined(MIPS32_R2_LE) | |
| 744 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" | |
| 745 #else | |
| 746 "mul.s %[f3], %[f2], %[f2] \n\t" | |
| 747 "add.s %[f0], %[f0], %[f3] \n\t" | |
| 748 #endif | |
| 749 "c.le.s %[f0], %[err_th2] \n\t" | |
| 750 "nop \n\t" | |
| 751 "bc1t 2f \n\t" | |
| 752 " nop \n\t" | |
| 753 "sqrt.s %[f0], %[f0] \n\t" | |
| 754 "add.s %[f0], %[f0], %[fac1] \n\t" | |
| 755 "div.s %[f0], %[err_th], %[f0] \n\t" | |
| 756 "mul.s %[f1], %[f1], %[f0] \n\t" | |
| 757 "mul.s %[f2], %[f2], %[f0] \n\t" | |
| 758 "2: \n\t" | |
| 759 "mul.s %[f1], %[f1], %[mu] \n\t" | |
| 760 "mul.s %[f2], %[f2], %[mu] \n\t" | |
| 761 "swc1 %[f1], 0(%[ef0]) \n\t" | |
| 762 "swc1 %[f2], 0(%[ef1]) \n\t" | |
| 763 "addiu %[len], %[len], -1 \n\t" | |
| 764 "addiu %[x_pow], %[x_pow], 4 \n\t" | |
| 765 "addiu %[ef0], %[ef0], 4 \n\t" | |
| 766 "bgtz %[len], 1b \n\t" | |
| 767 " addiu %[ef1], %[ef1], 4 \n\t" | |
| 768 ".set pop \n\t" | |
| 769 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), | |
| 770 #if !defined(MIPS32_R2_LE) | |
| 771 [f3] "=&f" (f3), | |
| 772 #endif | |
| 773 [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), | |
| 774 [len] "+r" (len) | |
| 775 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), | |
| 776 [err_th] "f" (error_threshold) | |
| 777 : "memory" | |
| 778 ); | |
| 779 } | |
| 780 | |
| 781 void WebRtcAec_InitAec_mips(void) { | |
| 782 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; | |
| 783 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; | |
| 784 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; | |
| 785 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; | |
| 786 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; | |
| 787 } | |
| OLD | NEW |