Index: webrtc/modules/audio_processing/aec/aec_core_sse2.c |
diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c |
index 682b61091653891a1851a671b41f9c033fc507c5..041b8d9ab299700ae86b10cc67bfe25a87cbb433 100644 |
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c |
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c |
@@ -82,7 +82,7 @@ static void FilterFarSSE2(int num_partitions, |
static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
float normal_mu, |
float normal_error_threshold, |
- float *xPow, |
+ float xPow[PART_LEN1], |
float ef[2][PART_LEN1]) { |
const __m128 k1e_10f = _mm_set1_ps(1e-10f); |
const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) |
@@ -148,24 +148,28 @@ static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
} |
} |
-static void FilterAdaptationSSE2(AecCore* aec, |
- float* fft, |
- float ef[2][PART_LEN1]) { |
+static void FilterAdaptationSSE2( |
+ int num_partitions, |
+ int xfBufBlockPos, |
+ float xfBuf[2][kExtendedNumPartitions * PART_LEN1], |
+ float ef[2][PART_LEN1], |
+ float wfBuf[2][kExtendedNumPartitions * PART_LEN1]) { |
+ float fft[PART_LEN2]; |
int i, j; |
- const int num_partitions = aec->num_partitions; |
- for (i = 0; i < num_partitions; i++) { |
- int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); |
+ const int num_partitions_local = num_partitions; |
hlundin-webrtc
2015/11/20 11:55:20
Is the local one needed?
peah-webrtc
2015/11/24 13:03:01
Done.
|
+ for (i = 0; i < num_partitions_local; i++) { |
+ int xPos = (i + xfBufBlockPos) * (PART_LEN1); |
int pos = i * PART_LEN1; |
// Check for wrap |
- if (i + aec->xfBufBlockPos >= num_partitions) { |
- xPos -= num_partitions * PART_LEN1; |
+ if (i + xfBufBlockPos >= num_partitions_local) { |
+ xPos -= num_partitions_local * PART_LEN1; |
} |
// Process the whole array... |
for (j = 0; j < PART_LEN; j += 4) { |
// Load xfBuf and ef. |
- const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); |
- const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); |
+ const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]); |
+ const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]); |
const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); |
const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); |
// Calculate the product of conjugate(xfBuf) by ef. |
@@ -185,8 +189,8 @@ static void FilterAdaptationSSE2(AecCore* aec, |
_mm_storeu_ps(&fft[2 * j + 4], h); |
} |
// ... and fixup the first imaginary entry. |
- fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], |
- -aec->xfBuf[1][xPos + PART_LEN], |
+ fft[1] = MulRe(xfBuf[0][xPos + PART_LEN], |
+ -xfBuf[1][xPos + PART_LEN], |
ef[0][PART_LEN], |
ef[1][PART_LEN]); |
@@ -206,11 +210,11 @@ static void FilterAdaptationSSE2(AecCore* aec, |
aec_rdft_forward_128(fft); |
{ |
- float wt1 = aec->wfBuf[1][pos]; |
- aec->wfBuf[0][pos + PART_LEN] += fft[1]; |
+ float wt1 = wfBuf[1][pos]; |
+ wfBuf[0][pos + PART_LEN] += fft[1]; |
for (j = 0; j < PART_LEN; j += 4) { |
- __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); |
- __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); |
+ __m128 wtBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]); |
+ __m128 wtBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]); |
const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); |
const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); |
const __m128 fft_re = |
@@ -219,10 +223,10 @@ static void FilterAdaptationSSE2(AecCore* aec, |
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); |
wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); |
wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); |
- _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); |
- _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); |
+ _mm_storeu_ps(&wfBuf[0][pos + j], wtBuf_re); |
+ _mm_storeu_ps(&wfBuf[1][pos + j], wtBuf_im); |
} |
- aec->wfBuf[1][pos] = wt1; |
+ wfBuf[1][pos] = wt1; |
} |
} |
} |