Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1043)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.c

Issue 1456123003: Ducking fix #3: Removed the state as an input to the FilterAdaptation function (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@Aec_Code_Cleanup2_CL
Patch Set: Removed gcc build-breaking method argument specifiers Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"
23 23
24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
25 return aRe * bRe - aIm * bIm; 25 return aRe * bRe - aIm * bIm;
26 } 26 }
27 27
28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { 28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
29 return aRe * bIm + aIm * bRe; 29 return aRe * bIm + aIm * bRe;
30 } 30 }
31 31
32 static void FilterFarSSE2(int num_partitions, 32 static void FilterFarSSE2(
33 int xfBufBlockPos, 33 int num_partitions,
34 float xfBuf[2][kExtendedNumPartitions * PART_LEN1], 34 int x_fft_buf_block_pos,
35 float wfBuf[2][kExtendedNumPartitions * PART_LEN1], 35 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
36 float yf[2][PART_LEN1]) { 36 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
37 float y_fft[2][PART_LEN1]) {
37 38
38 int i; 39 int i;
39 for (i = 0; i < num_partitions; i++) { 40 for (i = 0; i < num_partitions; i++) {
40 int j; 41 int j;
41 int xPos = (i + xfBufBlockPos) * PART_LEN1; 42 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
42 int pos = i * PART_LEN1; 43 int pos = i * PART_LEN1;
43 // Check for wrap 44 // Check for wrap
44 if (i + xfBufBlockPos >= num_partitions) { 45 if (i + x_fft_buf_block_pos >= num_partitions) {
45 xPos -= num_partitions * (PART_LEN1); 46 xPos -= num_partitions * (PART_LEN1);
46 } 47 }
47 48
48 // vectorized code (four at once) 49 // vectorized code (four at once)
49 for (j = 0; j + 3 < PART_LEN1; j += 4) { 50 for (j = 0; j + 3 < PART_LEN1; j += 4) {
50 const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]); 51 const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]);
51 const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]); 52 const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]);
52 const __m128 wfBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]); 53 const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
53 const __m128 wfBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]); 54 const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
54 const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); 55 const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]);
55 const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); 56 const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]);
56 const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); 57 const __m128 a = _mm_mul_ps(x_fft_buf_re, h_fft_buf_re);
57 const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); 58 const __m128 b = _mm_mul_ps(x_fft_buf_im, h_fft_buf_im);
58 const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); 59 const __m128 c = _mm_mul_ps(x_fft_buf_re, h_fft_buf_im);
59 const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); 60 const __m128 d = _mm_mul_ps(x_fft_buf_im, h_fft_buf_re);
60 const __m128 e = _mm_sub_ps(a, b); 61 const __m128 e = _mm_sub_ps(a, b);
61 const __m128 f = _mm_add_ps(c, d); 62 const __m128 f = _mm_add_ps(c, d);
62 const __m128 g = _mm_add_ps(yf_re, e); 63 const __m128 g = _mm_add_ps(y_fft_re, e);
63 const __m128 h = _mm_add_ps(yf_im, f); 64 const __m128 h = _mm_add_ps(y_fft_im, f);
64 _mm_storeu_ps(&yf[0][j], g); 65 _mm_storeu_ps(&y_fft[0][j], g);
65 _mm_storeu_ps(&yf[1][j], h); 66 _mm_storeu_ps(&y_fft[1][j], h);
66 } 67 }
67 // scalar code for the remaining items. 68 // scalar code for the remaining items.
68 for (; j < PART_LEN1; j++) { 69 for (; j < PART_LEN1; j++) {
69 yf[0][j] += MulRe(xfBuf[0][xPos + j], 70 y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
70 xfBuf[1][xPos + j], 71 x_fft_buf[1][xPos + j],
71 wfBuf[0][pos + j], 72 h_fft_buf[0][pos + j],
72 wfBuf[1][pos + j]); 73 h_fft_buf[1][pos + j]);
73 yf[1][j] += MulIm(xfBuf[0][xPos + j], 74 y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
74 xfBuf[1][xPos + j], 75 x_fft_buf[1][xPos + j],
75 wfBuf[0][pos + j], 76 h_fft_buf[0][pos + j],
76 wfBuf[1][pos + j]); 77 h_fft_buf[1][pos + j]);
77 } 78 }
78 } 79 }
79 } 80 }
80 81
81 static void ScaleErrorSignalSSE2(int extended_filter_enabled, 82 static void ScaleErrorSignalSSE2(int extended_filter_enabled,
82 float normal_mu, 83 float normal_mu,
83 float normal_error_threshold, 84 float normal_error_threshold,
84 float *x_pow, 85 float x_pow[PART_LEN1],
85 float ef[2][PART_LEN1]) { 86 float ef[2][PART_LEN1]) {
86 const __m128 k1e_10f = _mm_set1_ps(1e-10f); 87 const __m128 k1e_10f = _mm_set1_ps(1e-10f);
87 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) 88 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
88 : _mm_set1_ps(normal_mu); 89 : _mm_set1_ps(normal_mu);
89 const __m128 kThresh = extended_filter_enabled 90 const __m128 kThresh = extended_filter_enabled
90 ? _mm_set1_ps(kExtendedErrorThreshold) 91 ? _mm_set1_ps(kExtendedErrorThreshold)
91 : _mm_set1_ps(normal_error_threshold); 92 : _mm_set1_ps(normal_error_threshold);
92 93
93 int i; 94 int i;
94 // vectorized code (four at once) 95 // vectorized code (four at once)
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
140 ef[1][i] *= abs_ef; 141 ef[1][i] *= abs_ef;
141 } 142 }
142 143
143 // Stepsize factor 144 // Stepsize factor
144 ef[0][i] *= mu; 145 ef[0][i] *= mu;
145 ef[1][i] *= mu; 146 ef[1][i] *= mu;
146 } 147 }
147 } 148 }
148 } 149 }
149 150
150 static void FilterAdaptationSSE2(AecCore* aec, 151 static void FilterAdaptationSSE2(
151 float* fft, 152 int num_partitions,
152 float ef[2][PART_LEN1]) { 153 int x_fft_buf_block_pos,
154 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
155 float e_fft[2][PART_LEN1],
156 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
157 float fft[PART_LEN2];
153 int i, j; 158 int i, j;
154 const int num_partitions = aec->num_partitions;
155 for (i = 0; i < num_partitions; i++) { 159 for (i = 0; i < num_partitions; i++) {
156 int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); 160 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
157 int pos = i * PART_LEN1; 161 int pos = i * PART_LEN1;
158 // Check for wrap 162 // Check for wrap
159 if (i + aec->xfBufBlockPos >= num_partitions) { 163 if (i + x_fft_buf_block_pos >= num_partitions) {
160 xPos -= num_partitions * PART_LEN1; 164 xPos -= num_partitions * PART_LEN1;
161 } 165 }
162 166
163 // Process the whole array... 167 // Process the whole array...
164 for (j = 0; j < PART_LEN; j += 4) { 168 for (j = 0; j < PART_LEN; j += 4) {
165 // Load xfBuf and ef. 169 // Load x_fft_buf and e_fft.
166 const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); 170 const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]);
167 const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); 171 const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]);
168 const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); 172 const __m128 e_fft_re = _mm_loadu_ps(&e_fft[0][j]);
169 const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); 173 const __m128 e_fft_im = _mm_loadu_ps(&e_fft[1][j]);
170 // Calculate the product of conjugate(xfBuf) by ef. 174 // Calculate the product of conjugate(x_fft_buf) by e_fft.
171 // re(conjugate(a) * b) = aRe * bRe + aIm * bIm 175 // re(conjugate(a) * b) = aRe * bRe + aIm * bIm
172 // im(conjugate(a) * b)= aRe * bIm - aIm * bRe 176 // im(conjugate(a) * b)= aRe * bIm - aIm * bRe
173 const __m128 a = _mm_mul_ps(xfBuf_re, ef_re); 177 const __m128 a = _mm_mul_ps(x_fft_buf_re, e_fft_re);
174 const __m128 b = _mm_mul_ps(xfBuf_im, ef_im); 178 const __m128 b = _mm_mul_ps(x_fft_buf_im, e_fft_im);
175 const __m128 c = _mm_mul_ps(xfBuf_re, ef_im); 179 const __m128 c = _mm_mul_ps(x_fft_buf_re, e_fft_im);
176 const __m128 d = _mm_mul_ps(xfBuf_im, ef_re); 180 const __m128 d = _mm_mul_ps(x_fft_buf_im, e_fft_re);
177 const __m128 e = _mm_add_ps(a, b); 181 const __m128 e = _mm_add_ps(a, b);
178 const __m128 f = _mm_sub_ps(c, d); 182 const __m128 f = _mm_sub_ps(c, d);
179 // Interleave real and imaginary parts. 183 // Interleave real and imaginary parts.
180 const __m128 g = _mm_unpacklo_ps(e, f); 184 const __m128 g = _mm_unpacklo_ps(e, f);
181 const __m128 h = _mm_unpackhi_ps(e, f); 185 const __m128 h = _mm_unpackhi_ps(e, f);
182 // Store 186 // Store
183 _mm_storeu_ps(&fft[2 * j + 0], g); 187 _mm_storeu_ps(&fft[2 * j + 0], g);
184 _mm_storeu_ps(&fft[2 * j + 4], h); 188 _mm_storeu_ps(&fft[2 * j + 4], h);
185 } 189 }
186 // ... and fixup the first imaginary entry. 190 // ... and fixup the first imaginary entry.
187 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], 191 fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
188 -aec->xfBuf[1][xPos + PART_LEN], 192 -x_fft_buf[1][xPos + PART_LEN],
189 ef[0][PART_LEN], 193 e_fft[0][PART_LEN],
190 ef[1][PART_LEN]); 194 e_fft[1][PART_LEN]);
191 195
192 aec_rdft_inverse_128(fft); 196 aec_rdft_inverse_128(fft);
193 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 197 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
194 198
195 // fft scaling 199 // fft scaling
196 { 200 {
197 float scale = 2.0f / PART_LEN2; 201 float scale = 2.0f / PART_LEN2;
198 const __m128 scale_ps = _mm_load_ps1(&scale); 202 const __m128 scale_ps = _mm_load_ps1(&scale);
199 for (j = 0; j < PART_LEN; j += 4) { 203 for (j = 0; j < PART_LEN; j += 4) {
200 const __m128 fft_ps = _mm_loadu_ps(&fft[j]); 204 const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
201 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); 205 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
202 _mm_storeu_ps(&fft[j], fft_scale); 206 _mm_storeu_ps(&fft[j], fft_scale);
203 } 207 }
204 } 208 }
205 aec_rdft_forward_128(fft); 209 aec_rdft_forward_128(fft);
206 210
207 { 211 {
208 float wt1 = aec->wfBuf[1][pos]; 212 float wt1 = h_fft_buf[1][pos];
209 aec->wfBuf[0][pos + PART_LEN] += fft[1]; 213 h_fft_buf[0][pos + PART_LEN] += fft[1];
210 for (j = 0; j < PART_LEN; j += 4) { 214 for (j = 0; j < PART_LEN; j += 4) {
211 __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); 215 __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
212 __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); 216 __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
213 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); 217 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
214 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); 218 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
215 const __m128 fft_re = 219 const __m128 fft_re =
216 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0)); 220 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
217 const __m128 fft_im = 221 const __m128 fft_im =
218 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); 222 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
219 wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); 223 wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
220 wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); 224 wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
221 _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); 225 _mm_storeu_ps(&h_fft_buf[0][pos + j], wtBuf_re);
222 _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); 226 _mm_storeu_ps(&h_fft_buf[1][pos + j], wtBuf_im);
223 } 227 }
224 aec->wfBuf[1][pos] = wt1; 228 h_fft_buf[1][pos] = wt1;
225 } 229 }
226 } 230 }
227 } 231 }
228 232
229 static __m128 mm_pow_ps(__m128 a, __m128 b) { 233 static __m128 mm_pow_ps(__m128 a, __m128 b) {
230 // a^b = exp2(b * log2(a)) 234 // a^b = exp2(b * log2(a))
231 // exp2(x) and log2(x) are calculated using polynomial approximations. 235 // exp2(x) and log2(x) are calculated using polynomial approximations.
232 __m128 log2_a, b_log2_a, a_exp_b; 236 __m128 log2_a, b_log2_a, a_exp_b;
233 237
234 // Calculate log2(x), x = a. 238 // Calculate log2(x), x = a.
(...skipping 495 matching lines...) Expand 10 before | Expand all | Expand 10 after
730 } 734 }
731 } 735 }
732 736
733 void WebRtcAec_InitAec_SSE2(void) { 737 void WebRtcAec_InitAec_SSE2(void) {
734 WebRtcAec_FilterFar = FilterFarSSE2; 738 WebRtcAec_FilterFar = FilterFarSSE2;
735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 739 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
736 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 740 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 741 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
738 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 742 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
739 } 743 }
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698