Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(689)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.c

Issue 1456123003: Ducking fix #3: Removed the state as an input to the FilterAdaptation function (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@Aec_Code_Cleanup2_CL
Patch Set: Various refactoring, among other things simplification of the EchoSubtraction function Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 xfBuf[1][xPos + j], 75 xfBuf[1][xPos + j],
76 wfBuf[0][pos + j], 76 wfBuf[0][pos + j],
77 wfBuf[1][pos + j]); 77 wfBuf[1][pos + j]);
78 } 78 }
79 } 79 }
80 } 80 }
81 81
82 static void ScaleErrorSignalSSE2(int extended_filter_enabled, 82 static void ScaleErrorSignalSSE2(int extended_filter_enabled,
83 float normal_mu, 83 float normal_mu,
84 float normal_error_threshold, 84 float normal_error_threshold,
85 float *xPow, 85 float xPow[PART_LEN1],
86 float ef[2][PART_LEN1]) { 86 float ef[2][PART_LEN1]) {
87 const __m128 k1e_10f = _mm_set1_ps(1e-10f); 87 const __m128 k1e_10f = _mm_set1_ps(1e-10f);
88 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) 88 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
89 : _mm_set1_ps(normal_mu); 89 : _mm_set1_ps(normal_mu);
90 const __m128 kThresh = extended_filter_enabled 90 const __m128 kThresh = extended_filter_enabled
91 ? _mm_set1_ps(kExtendedErrorThreshold) 91 ? _mm_set1_ps(kExtendedErrorThreshold)
92 : _mm_set1_ps(normal_error_threshold); 92 : _mm_set1_ps(normal_error_threshold);
93 93
94 int i; 94 int i;
95 // vectorized code (four at once) 95 // vectorized code (four at once)
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
141 ef[1][i] *= abs_ef; 141 ef[1][i] *= abs_ef;
142 } 142 }
143 143
144 // Stepsize factor 144 // Stepsize factor
145 ef[0][i] *= mu; 145 ef[0][i] *= mu;
146 ef[1][i] *= mu; 146 ef[1][i] *= mu;
147 } 147 }
148 } 148 }
149 } 149 }
150 150
151 static void FilterAdaptationSSE2(AecCore* aec, 151 static void FilterAdaptationSSE2(
152 float* fft, 152 int num_partitions,
153 float ef[2][PART_LEN1]) { 153 int xfBufBlockPos,
154 float xfBuf[2][kExtendedNumPartitions * PART_LEN1],
155 float ef[2][PART_LEN1],
156 float wfBuf[2][kExtendedNumPartitions * PART_LEN1]) {
157 float fft[PART_LEN2];
154 int i, j; 158 int i, j;
155 const int num_partitions = aec->num_partitions; 159 const int num_partitions_local = num_partitions;
hlundin-webrtc 2015/11/20 11:55:20 Is the local one needed?
peah-webrtc 2015/11/24 13:03:01 Done.
156 for (i = 0; i < num_partitions; i++) { 160 for (i = 0; i < num_partitions_local; i++) {
157 int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); 161 int xPos = (i + xfBufBlockPos) * (PART_LEN1);
158 int pos = i * PART_LEN1; 162 int pos = i * PART_LEN1;
159 // Check for wrap 163 // Check for wrap
160 if (i + aec->xfBufBlockPos >= num_partitions) { 164 if (i + xfBufBlockPos >= num_partitions_local) {
161 xPos -= num_partitions * PART_LEN1; 165 xPos -= num_partitions_local * PART_LEN1;
162 } 166 }
163 167
164 // Process the whole array... 168 // Process the whole array...
165 for (j = 0; j < PART_LEN; j += 4) { 169 for (j = 0; j < PART_LEN; j += 4) {
166 // Load xfBuf and ef. 170 // Load xfBuf and ef.
167 const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); 171 const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]);
168 const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); 172 const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]);
169 const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); 173 const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
170 const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); 174 const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
171 // Calculate the product of conjugate(xfBuf) by ef. 175 // Calculate the product of conjugate(xfBuf) by ef.
172 // re(conjugate(a) * b) = aRe * bRe + aIm * bIm 176 // re(conjugate(a) * b) = aRe * bRe + aIm * bIm
173 // im(conjugate(a) * b)= aRe * bIm - aIm * bRe 177 // im(conjugate(a) * b)= aRe * bIm - aIm * bRe
174 const __m128 a = _mm_mul_ps(xfBuf_re, ef_re); 178 const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
175 const __m128 b = _mm_mul_ps(xfBuf_im, ef_im); 179 const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
176 const __m128 c = _mm_mul_ps(xfBuf_re, ef_im); 180 const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
177 const __m128 d = _mm_mul_ps(xfBuf_im, ef_re); 181 const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
178 const __m128 e = _mm_add_ps(a, b); 182 const __m128 e = _mm_add_ps(a, b);
179 const __m128 f = _mm_sub_ps(c, d); 183 const __m128 f = _mm_sub_ps(c, d);
180 // Interleave real and imaginary parts. 184 // Interleave real and imaginary parts.
181 const __m128 g = _mm_unpacklo_ps(e, f); 185 const __m128 g = _mm_unpacklo_ps(e, f);
182 const __m128 h = _mm_unpackhi_ps(e, f); 186 const __m128 h = _mm_unpackhi_ps(e, f);
183 // Store 187 // Store
184 _mm_storeu_ps(&fft[2 * j + 0], g); 188 _mm_storeu_ps(&fft[2 * j + 0], g);
185 _mm_storeu_ps(&fft[2 * j + 4], h); 189 _mm_storeu_ps(&fft[2 * j + 4], h);
186 } 190 }
187 // ... and fixup the first imaginary entry. 191 // ... and fixup the first imaginary entry.
188 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], 192 fft[1] = MulRe(xfBuf[0][xPos + PART_LEN],
189 -aec->xfBuf[1][xPos + PART_LEN], 193 -xfBuf[1][xPos + PART_LEN],
190 ef[0][PART_LEN], 194 ef[0][PART_LEN],
191 ef[1][PART_LEN]); 195 ef[1][PART_LEN]);
192 196
193 aec_rdft_inverse_128(fft); 197 aec_rdft_inverse_128(fft);
194 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 198 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
195 199
196 // fft scaling 200 // fft scaling
197 { 201 {
198 float scale = 2.0f / PART_LEN2; 202 float scale = 2.0f / PART_LEN2;
199 const __m128 scale_ps = _mm_load_ps1(&scale); 203 const __m128 scale_ps = _mm_load_ps1(&scale);
200 for (j = 0; j < PART_LEN; j += 4) { 204 for (j = 0; j < PART_LEN; j += 4) {
201 const __m128 fft_ps = _mm_loadu_ps(&fft[j]); 205 const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
202 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); 206 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
203 _mm_storeu_ps(&fft[j], fft_scale); 207 _mm_storeu_ps(&fft[j], fft_scale);
204 } 208 }
205 } 209 }
206 aec_rdft_forward_128(fft); 210 aec_rdft_forward_128(fft);
207 211
208 { 212 {
209 float wt1 = aec->wfBuf[1][pos]; 213 float wt1 = wfBuf[1][pos];
210 aec->wfBuf[0][pos + PART_LEN] += fft[1]; 214 wfBuf[0][pos + PART_LEN] += fft[1];
211 for (j = 0; j < PART_LEN; j += 4) { 215 for (j = 0; j < PART_LEN; j += 4) {
212 __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); 216 __m128 wtBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]);
213 __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); 217 __m128 wtBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]);
214 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); 218 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
215 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); 219 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
216 const __m128 fft_re = 220 const __m128 fft_re =
217 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0)); 221 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
218 const __m128 fft_im = 222 const __m128 fft_im =
219 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); 223 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
220 wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); 224 wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
221 wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); 225 wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
222 _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); 226 _mm_storeu_ps(&wfBuf[0][pos + j], wtBuf_re);
223 _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); 227 _mm_storeu_ps(&wfBuf[1][pos + j], wtBuf_im);
224 } 228 }
225 aec->wfBuf[1][pos] = wt1; 229 wfBuf[1][pos] = wt1;
226 } 230 }
227 } 231 }
228 } 232 }
229 233
230 static __m128 mm_pow_ps(__m128 a, __m128 b) { 234 static __m128 mm_pow_ps(__m128 a, __m128 b) {
231 // a^b = exp2(b * log2(a)) 235 // a^b = exp2(b * log2(a))
232 // exp2(x) and log2(x) are calculated using polynomial approximations. 236 // exp2(x) and log2(x) are calculated using polynomial approximations.
233 __m128 log2_a, b_log2_a, a_exp_b; 237 __m128 log2_a, b_log2_a, a_exp_b;
234 238
235 // Calculate log2(x), x = a. 239 // Calculate log2(x), x = a.
(...skipping 495 matching lines...) Expand 10 before | Expand all | Expand 10 after
731 } 735 }
732 } 736 }
733 737
734 void WebRtcAec_InitAec_SSE2(void) { 738 void WebRtcAec_InitAec_SSE2(void) {
735 WebRtcAec_FilterFar = FilterFarSSE2; 739 WebRtcAec_FilterFar = FilterFarSSE2;
736 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 740 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
737 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 741 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
738 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 742 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
739 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 743 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
740 } 744 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698