Index: webrtc/modules/audio_processing/aec/aec_core_mips.cc |
diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.cc b/webrtc/modules/audio_processing/aec/aec_core_mips.cc |
index c5c9b0b044618d1e6e7cd73a6f07cbacf33959df..b3fccb0ca3a7808cb97a21a85227751abd1f3c9a 100644 |
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.cc |
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.cc |
@@ -27,304 +27,6 @@ namespace webrtc { |
extern const float WebRtcAec_weightCurve[65]; |
extern const float WebRtcAec_overDriveCurve[65]; |
-void WebRtcAec_ComfortNoise_mips(AecCore* aec, |
- float efw[2][PART_LEN1], |
- float comfortNoiseHband[2][PART_LEN1], |
- const float* noisePow, |
- const float* lambda) { |
- int i, num; |
- float rand[PART_LEN]; |
- float noise, noiseAvg, tmp, tmpAvg; |
- int16_t randW16[PART_LEN]; |
- complex_t u[PART_LEN1]; |
- |
- const float pi2 = 6.28318530717959f; |
- const float pi2t = pi2 / 32768; |
- |
- // Generate a uniform random array on [0 1] |
- WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); |
- |
- int16_t* randWptr = randW16; |
- float randTemp, randTemp2, randTemp3, randTemp4; |
- int32_t tmp1s, tmp2s, tmp3s, tmp4s; |
- |
- for (i = 0; i < PART_LEN; i += 4) { |
- __asm __volatile( |
- ".set push \n\t" |
- ".set noreorder \n\t" |
- "lh %[tmp1s], 0(%[randWptr]) \n\t" |
- "lh %[tmp2s], 2(%[randWptr]) \n\t" |
- "lh %[tmp3s], 4(%[randWptr]) \n\t" |
- "lh %[tmp4s], 6(%[randWptr]) \n\t" |
- "mtc1 %[tmp1s], %[randTemp] \n\t" |
- "mtc1 %[tmp2s], %[randTemp2] \n\t" |
- "mtc1 %[tmp3s], %[randTemp3] \n\t" |
- "mtc1 %[tmp4s], %[randTemp4] \n\t" |
- "cvt.s.w %[randTemp], %[randTemp] \n\t" |
- "cvt.s.w %[randTemp2], %[randTemp2] \n\t" |
- "cvt.s.w %[randTemp3], %[randTemp3] \n\t" |
- "cvt.s.w %[randTemp4], %[randTemp4] \n\t" |
- "addiu %[randWptr], %[randWptr], 8 \n\t" |
- "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" |
- "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" |
- "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" |
- "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" |
- ".set pop \n\t" |
- : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), |
- [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), |
- [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), |
- [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), |
- [tmp4s] "=&r" (tmp4s) |
- : [pi2t] "f" (pi2t) |
- : "memory"); |
- |
- u[i + 1][0] = cosf(randTemp); |
- u[i + 1][1] = sinf(randTemp); |
- u[i + 2][0] = cosf(randTemp2); |
- u[i + 2][1] = sinf(randTemp2); |
- u[i + 3][0] = cosf(randTemp3); |
- u[i + 3][1] = sinf(randTemp3); |
- u[i + 4][0] = cosf(randTemp4); |
- u[i + 4][1] = sinf(randTemp4); |
- } |
- |
- // Reject LF noise |
- float* u_ptr = &u[1][0]; |
- float noise2, noise3, noise4; |
- float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; |
- |
- u[0][0] = 0; |
- u[0][1] = 0; |
- for (i = 1; i < PART_LEN1; i += 4) { |
- __asm __volatile( |
- ".set push \n\t" |
- ".set noreorder \n\t" |
- "lwc1 %[noise], 4(%[noisePow]) \n\t" |
- "lwc1 %[noise2], 8(%[noisePow]) \n\t" |
- "lwc1 %[noise3], 12(%[noisePow]) \n\t" |
- "lwc1 %[noise4], 16(%[noisePow]) \n\t" |
- "sqrt.s %[noise], %[noise] \n\t" |
- "sqrt.s %[noise2], %[noise2] \n\t" |
- "sqrt.s %[noise3], %[noise3] \n\t" |
- "sqrt.s %[noise4], %[noise4] \n\t" |
- "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" |
- "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" |
- "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" |
- "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" |
- "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" |
- "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" |
- "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" |
- "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" |
- "addiu %[noisePow], %[noisePow], 16 \n\t" |
- "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" |
- "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" |
- "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" |
- "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" |
- "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" |
- "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" |
- "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" |
- "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" |
- "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" |
- "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" |
- "neg.s %[tmp2f] \n\t" |
- "neg.s %[tmp4f] \n\t" |
- "neg.s %[tmp6f] \n\t" |
- "neg.s %[tmp8f] \n\t" |
- "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" |
- "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" |
- "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" |
- "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" |
- "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" |
- "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" |
- "addiu %[u_ptr], %[u_ptr], 32 \n\t" |
- ".set pop \n\t" |
- : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), |
- [noise] "=&f" (noise), [noise2] "=&f" (noise2), |
- [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), |
- [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), |
- [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), |
- [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), |
- [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) |
- : |
- : "memory"); |
- } |
- u[PART_LEN][1] = 0; |
- noisePow -= PART_LEN; |
- |
- u_ptr = &u[0][0]; |
- float* u_ptr_end = &u[PART_LEN][0]; |
- float* efw_ptr_0 = &efw[0][0]; |
- float* efw_ptr_1 = &efw[1][0]; |
- float tmp9f, tmp10f; |
- const float tmp1c = 1.0; |
- |
- __asm __volatile( |
- ".set push \n\t" |
- ".set noreorder \n\t" |
- "1: \n\t" |
- "lwc1 %[tmp1f], 0(%[lambda]) \n\t" |
- "lwc1 %[tmp6f], 4(%[lambda]) \n\t" |
- "addiu %[lambda], %[lambda], 8 \n\t" |
- "c.lt.s %[tmp1f], %[tmp1c] \n\t" |
- "bc1f 4f \n\t" |
- " nop \n\t" |
- "c.lt.s %[tmp6f], %[tmp1c] \n\t" |
- "bc1f 3f \n\t" |
- " nop \n\t" |
- "2: \n\t" |
- "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" |
- "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" |
- "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" |
- "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" |
- "sqrt.s %[tmp1f], %[tmp1f] \n\t" |
- "sqrt.s %[tmp6f], %[tmp6f] \n\t" |
- "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" |
- "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" |
- "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" |
- "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" |
- "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" |
- "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" |
- "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" |
- "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" |
-#if !defined(MIPS32_R2_LE) |
- "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" |
- "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" |
- "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" |
- "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" |
- "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" |
- "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" |
- "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" |
- "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" |
-#else // #if !defined(MIPS32_R2_LE) |
- "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" |
- "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" |
- "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" |
- "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" |
-#endif // #if !defined(MIPS32_R2_LE) |
- "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" |
- "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" |
- "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" |
- "b 5f \n\t" |
- " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" |
- "3: \n\t" |
- "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" |
- "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" |
- "sqrt.s %[tmp1f], %[tmp1f] \n\t" |
- "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" |
- "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" |
- "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" |
- "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" |
-#if !defined(MIPS32_R2_LE) |
- "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" |
- "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" |
- "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" |
- "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" |
-#else // #if !defined(MIPS32_R2_LE) |
- "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" |
- "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" |
-#endif // #if !defined(MIPS32_R2_LE) |
- "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" |
- "b 5f \n\t" |
- " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" |
- "4: \n\t" |
- "c.lt.s %[tmp6f], %[tmp1c] \n\t" |
- "bc1f 5f \n\t" |
- " nop \n\t" |
- "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" |
- "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" |
- "sqrt.s %[tmp6f], %[tmp6f] \n\t" |
- "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" |
- "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" |
- "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" |
- "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" |
-#if !defined(MIPS32_R2_LE) |
- "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" |
- "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" |
- "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" |
- "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" |
-#else // #if !defined(MIPS32_R2_LE) |
- "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" |
- "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" |
-#endif // #if !defined(MIPS32_R2_LE) |
- "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" |
- "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" |
- "5: \n\t" |
- "addiu %[u_ptr], %[u_ptr], 16 \n\t" |
- "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" |
- "bne %[u_ptr], %[u_ptr_end], 1b \n\t" |
- " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" |
- ".set pop \n\t" |
- : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), |
- [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), |
- [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), |
- [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), |
- [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), |
- [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) |
- : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) |
- : "memory"); |
- |
- lambda -= PART_LEN; |
- tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); |
- // tmp = 1 - lambda[i]; |
- efw[0][PART_LEN] += tmp * u[PART_LEN][0]; |
- efw[1][PART_LEN] += tmp * u[PART_LEN][1]; |
- |
- // For H band comfort noise |
- // TODO(peah): don't compute noise and "tmp" twice. Use the previous results. |
- noiseAvg = 0.0; |
- tmpAvg = 0.0; |
- num = 0; |
- if (aec->num_bands > 1) { |
- for (i = 0; i < PART_LEN; i++) { |
- rand[i] = (static_cast<float>(randW16[i])) / 32768; |
- } |
- |
- // average noise scale |
- // average over second half of freq spectrum (i.e., 4->8khz) |
- // TODO(peah): we shouldn't need num. We know how many elements we're |
- // summing. |
- for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { |
- num++; |
- noiseAvg += sqrtf(noisePow[i]); |
- } |
- noiseAvg /= static_cast<float>(num); |
- |
- // average nlp scale |
- // average over second half of freq spectrum (i.e., 4->8khz) |
- // TODO(peah): we shouldn't need num. We know how many elements we're |
- // summing. |
- num = 0; |
- for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { |
- num++; |
- tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); |
- } |
- tmpAvg /= static_cast<float>(num); |
- |
- // Use average noise for H band |
- // TODO(peah): we should probably have a new random vector here. |
- // Reject LF noise |
- u[0][0] = 0; |
- u[0][1] = 0; |
- for (i = 1; i < PART_LEN1; i++) { |
- tmp = pi2 * rand[i - 1]; |
- |
- // Use average noise for H band |
- u[i][0] = noiseAvg * static_cast<float>(cos(tmp)); |
- u[i][1] = -noiseAvg * static_cast<float>(sin(tmp)); |
- } |
- u[PART_LEN][1] = 0; |
- |
- for (i = 0; i < PART_LEN1; i++) { |
- // Use average NLP weight for H band |
- comfortNoiseHband[0][i] = tmpAvg * u[i][0]; |
- comfortNoiseHband[1][i] = tmpAvg * u[i][1]; |
- } |
- } else { |
- memset(comfortNoiseHband, 0, |
- 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); |
- } |
-} |
- |
void WebRtcAec_FilterFar_mips( |
int num_partitions, |
int x_fft_buf_block_pos, |
@@ -773,7 +475,6 @@ void WebRtcAec_InitAec_mips(void) { |
WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; |
WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; |
WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; |
- WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; |
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; |
} |
} // namespace webrtc |