Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.c

Issue 1454983006: Ducking fix #2: Removed the aec state as an input parameter to the FilterFar function. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@Aec_Code_Cleanup_CL
Patch Set: Corrected line alignment Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"
23 23
24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
25 return aRe * bRe - aIm * bIm; 25 return aRe * bRe - aIm * bIm;
26 } 26 }
27 27
28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { 28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
29 return aRe * bIm + aIm * bRe; 29 return aRe * bIm + aIm * bRe;
30 } 30 }
31 31
32 static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { 32 static void FilterFarSSE2(int num_partitions,
33 int xfBufBlockPos,
34 float xfBuf[2][kExtendedNumPartitions * PART_LEN1],
35 float wfBuf[2][kExtendedNumPartitions * PART_LEN1],
36 float yf[2][PART_LEN1]) {
37
33 int i; 38 int i;
34 const int num_partitions = aec->num_partitions;
35 for (i = 0; i < num_partitions; i++) { 39 for (i = 0; i < num_partitions; i++) {
36 int j; 40 int j;
37 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 41 int xPos = (i + xfBufBlockPos) * PART_LEN1;
38 int pos = i * PART_LEN1; 42 int pos = i * PART_LEN1;
39 // Check for wrap 43 // Check for wrap
40 if (i + aec->xfBufBlockPos >= num_partitions) { 44 if (i + xfBufBlockPos >= num_partitions) {
41 xPos -= num_partitions * (PART_LEN1); 45 xPos -= num_partitions * (PART_LEN1);
42 } 46 }
43 47
44 // vectorized code (four at once) 48 // vectorized code (four at once)
45 for (j = 0; j + 3 < PART_LEN1; j += 4) { 49 for (j = 0; j + 3 < PART_LEN1; j += 4) {
46 const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); 50 const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]);
47 const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); 51 const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]);
48 const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); 52 const __m128 wfBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]);
49 const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); 53 const __m128 wfBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]);
50 const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); 54 const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
51 const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); 55 const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
52 const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); 56 const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
53 const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); 57 const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
54 const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); 58 const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
55 const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); 59 const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
56 const __m128 e = _mm_sub_ps(a, b); 60 const __m128 e = _mm_sub_ps(a, b);
57 const __m128 f = _mm_add_ps(c, d); 61 const __m128 f = _mm_add_ps(c, d);
58 const __m128 g = _mm_add_ps(yf_re, e); 62 const __m128 g = _mm_add_ps(yf_re, e);
59 const __m128 h = _mm_add_ps(yf_im, f); 63 const __m128 h = _mm_add_ps(yf_im, f);
60 _mm_storeu_ps(&yf[0][j], g); 64 _mm_storeu_ps(&yf[0][j], g);
61 _mm_storeu_ps(&yf[1][j], h); 65 _mm_storeu_ps(&yf[1][j], h);
62 } 66 }
63 // scalar code for the remaining items. 67 // scalar code for the remaining items.
64 for (; j < PART_LEN1; j++) { 68 for (; j < PART_LEN1; j++) {
65 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], 69 yf[0][j] += MulRe(xfBuf[0][xPos + j],
66 aec->xfBuf[1][xPos + j], 70 xfBuf[1][xPos + j],
67 aec->wfBuf[0][pos + j], 71 wfBuf[0][pos + j],
68 aec->wfBuf[1][pos + j]); 72 wfBuf[1][pos + j]);
69 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], 73 yf[1][j] += MulIm(xfBuf[0][xPos + j],
70 aec->xfBuf[1][xPos + j], 74 xfBuf[1][xPos + j],
71 aec->wfBuf[0][pos + j], 75 wfBuf[0][pos + j],
72 aec->wfBuf[1][pos + j]); 76 wfBuf[1][pos + j]);
73 } 77 }
74 } 78 }
75 } 79 }
76 80
77 static void ScaleErrorSignalSSE2(int extended_filter_enabled, 81 static void ScaleErrorSignalSSE2(int extended_filter_enabled,
78 float normal_mu, 82 float normal_mu,
79 float normal_error_threshold, 83 float normal_error_threshold,
80 float *x_pow, 84 float *x_pow,
81 float ef[2][PART_LEN1]) { 85 float ef[2][PART_LEN1]) {
82 const __m128 k1e_10f = _mm_set1_ps(1e-10f); 86 const __m128 k1e_10f = _mm_set1_ps(1e-10f);
(...skipping 643 matching lines...) Expand 10 before | Expand all | Expand 10 after
726 } 730 }
727 } 731 }
728 732
729 void WebRtcAec_InitAec_SSE2(void) { 733 void WebRtcAec_InitAec_SSE2(void) {
730 WebRtcAec_FilterFar = FilterFarSSE2; 734 WebRtcAec_FilterFar = FilterFarSSE2;
731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
732 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 736 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
734 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 738 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
735 } 739 }
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698