Chromium Code Reviews| Index: webrtc/modules/audio_processing/aec/aec_core_sse2.c |
| diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c |
| index 682b61091653891a1851a671b41f9c033fc507c5..041b8d9ab299700ae86b10cc67bfe25a87cbb433 100644 |
| --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c |
| +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c |
| @@ -82,7 +82,7 @@ static void FilterFarSSE2(int num_partitions, |
| static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
| float normal_mu, |
| float normal_error_threshold, |
| - float *xPow, |
| + float xPow[PART_LEN1], |
| float ef[2][PART_LEN1]) { |
| const __m128 k1e_10f = _mm_set1_ps(1e-10f); |
| const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) |
| @@ -148,24 +148,28 @@ static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
| } |
| } |
| -static void FilterAdaptationSSE2(AecCore* aec, |
| - float* fft, |
| - float ef[2][PART_LEN1]) { |
| +static void FilterAdaptationSSE2( |
| + int num_partitions, |
| + int xfBufBlockPos, |
| + float xfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| + float ef[2][PART_LEN1], |
| + float wfBuf[2][kExtendedNumPartitions * PART_LEN1]) { |
| + float fft[PART_LEN2]; |
| int i, j; |
| - const int num_partitions = aec->num_partitions; |
| - for (i = 0; i < num_partitions; i++) { |
| - int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); |
| + const int num_partitions_local = num_partitions; |
|
hlundin-webrtc
2015/11/20 11:55:20
Is the local one needed?
peah-webrtc
2015/11/24 13:03:01
Done.
|
| + for (i = 0; i < num_partitions_local; i++) { |
| + int xPos = (i + xfBufBlockPos) * (PART_LEN1); |
| int pos = i * PART_LEN1; |
| // Check for wrap |
| - if (i + aec->xfBufBlockPos >= num_partitions) { |
| - xPos -= num_partitions * PART_LEN1; |
| + if (i + xfBufBlockPos >= num_partitions_local) { |
| + xPos -= num_partitions_local * PART_LEN1; |
| } |
| // Process the whole array... |
| for (j = 0; j < PART_LEN; j += 4) { |
| // Load xfBuf and ef. |
| - const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); |
| - const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); |
| + const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]); |
| + const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]); |
| const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); |
| const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); |
| // Calculate the product of conjugate(xfBuf) by ef. |
| @@ -185,8 +189,8 @@ static void FilterAdaptationSSE2(AecCore* aec, |
| _mm_storeu_ps(&fft[2 * j + 4], h); |
| } |
| // ... and fixup the first imaginary entry. |
| - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], |
| - -aec->xfBuf[1][xPos + PART_LEN], |
| + fft[1] = MulRe(xfBuf[0][xPos + PART_LEN], |
| + -xfBuf[1][xPos + PART_LEN], |
| ef[0][PART_LEN], |
| ef[1][PART_LEN]); |
| @@ -206,11 +210,11 @@ static void FilterAdaptationSSE2(AecCore* aec, |
| aec_rdft_forward_128(fft); |
| { |
| - float wt1 = aec->wfBuf[1][pos]; |
| - aec->wfBuf[0][pos + PART_LEN] += fft[1]; |
| + float wt1 = wfBuf[1][pos]; |
| + wfBuf[0][pos + PART_LEN] += fft[1]; |
| for (j = 0; j < PART_LEN; j += 4) { |
| - __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); |
| - __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); |
| + __m128 wtBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]); |
| + __m128 wtBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]); |
| const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); |
| const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); |
| const __m128 fft_re = |
| @@ -219,10 +223,10 @@ static void FilterAdaptationSSE2(AecCore* aec, |
| _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); |
| wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); |
| wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); |
| - _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); |
| - _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); |
| + _mm_storeu_ps(&wfBuf[0][pos + j], wtBuf_re); |
| + _mm_storeu_ps(&wfBuf[1][pos + j], wtBuf_im); |
| } |
| - aec->wfBuf[1][pos] = wt1; |
| + wfBuf[1][pos] = wt1; |
| } |
| } |
| } |