webrtc/modules/audio_processing/aec/aec_core_sse2.c - Issue 1456123003: Ducking fix #3: Removed the state as an input to the FilterAdaptation function

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.c

Issue 1456123003: Ducking fix #3: Removed the state as an input to the FilterAdaptation function (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@Aec_Code_Cleanup2_CL

Patch Set: Various refactoring, among other things simplification of the EchoSubtraction function Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
75 xfBuf[1][xPos + j],	75 xfBuf[1][xPos + j],

76 wfBuf[0][pos + j],	76 wfBuf[0][pos + j],

77 wfBuf[1][pos + j]);	77 wfBuf[1][pos + j]);

78 }	78 }

79 }	79 }

80 }	80 }

81	81

82 static void ScaleErrorSignalSSE2(int extended_filter_enabled,	82 static void ScaleErrorSignalSSE2(int extended_filter_enabled,

83 float normal_mu,	83 float normal_mu,

84 float normal_error_threshold,	84 float normal_error_threshold,

85 float *xPow,	85 float xPow[PART_LEN1],

86 float ef[2][PART_LEN1]) {	86 float ef[2][PART_LEN1]) {

87 const __m128 k1e_10f = _mm_set1_ps(1e-10f);	87 const __m128 k1e_10f = _mm_set1_ps(1e-10f);

88 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)	88 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)

89 : _mm_set1_ps(normal_mu);	89 : _mm_set1_ps(normal_mu);

90 const __m128 kThresh = extended_filter_enabled	90 const __m128 kThresh = extended_filter_enabled

91 ? _mm_set1_ps(kExtendedErrorThreshold)	91 ? _mm_set1_ps(kExtendedErrorThreshold)

92 : _mm_set1_ps(normal_error_threshold);	92 : _mm_set1_ps(normal_error_threshold);

93	93

94 int i;	94 int i;

95 // vectorized code (four at once)	95 // vectorized code (four at once)

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
141 ef[1][i] *= abs_ef;	141 ef[1][i] *= abs_ef;

142 }	142 }

143	143

144 // Stepsize factor	144 // Stepsize factor

145 ef[0][i] *= mu;	145 ef[0][i] *= mu;

146 ef[1][i] *= mu;	146 ef[1][i] *= mu;

147 }	147 }

148 }	148 }

149 }	149 }

150	150

151 static void FilterAdaptationSSE2(AecCore* aec,	151 static void FilterAdaptationSSE2(

152 float* fft,	152 int num_partitions,

153 float ef[2][PART_LEN1]) {	153 int xfBufBlockPos,

	154 float xfBuf[2][kExtendedNumPartitions * PART_LEN1],

	155 float ef[2][PART_LEN1],

	156 float wfBuf[2][kExtendedNumPartitions * PART_LEN1]) {

	157 float fft[PART_LEN2];

154 int i, j;	158 int i, j;

155 const int num_partitions = aec->num_partitions;	159 const int num_partitions_local = num_partitions;
	hlundin-webrtc 2015/11/20 11:55:20 Is the local one needed? Is the local one needed? peah-webrtc 2015/11/24 13:03:01 Done. Show quoted text On 2015/11/20 11:55:20, hlundin-webrtc wrote: > Is the local one needed? Done.
156 for (i = 0; i < num_partitions; i++) {	160 for (i = 0; i < num_partitions_local; i++) {

157 int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);	161 int xPos = (i + xfBufBlockPos) * (PART_LEN1);

158 int pos = i * PART_LEN1;	162 int pos = i * PART_LEN1;

159 // Check for wrap	163 // Check for wrap

160 if (i + aec->xfBufBlockPos >= num_partitions) {	164 if (i + xfBufBlockPos >= num_partitions_local) {

161 xPos -= num_partitions * PART_LEN1;	165 xPos -= num_partitions_local * PART_LEN1;

162 }	166 }

163	167

164 // Process the whole array...	168 // Process the whole array...

165 for (j = 0; j < PART_LEN; j += 4) {	169 for (j = 0; j < PART_LEN; j += 4) {

166 // Load xfBuf and ef.	170 // Load xfBuf and ef.

167 const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);	171 const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]);

168 const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);	172 const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]);

169 const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);	173 const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);

170 const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);	174 const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);

171 // Calculate the product of conjugate(xfBuf) by ef.	175 // Calculate the product of conjugate(xfBuf) by ef.

172 // re(conjugate(a) * b) = aRe * bRe + aIm * bIm	176 // re(conjugate(a) * b) = aRe * bRe + aIm * bIm

173 // im(conjugate(a) * b)= aRe * bIm - aIm * bRe	177 // im(conjugate(a) * b)= aRe * bIm - aIm * bRe

174 const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);	178 const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);

175 const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);	179 const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);

176 const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);	180 const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);

177 const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);	181 const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);

178 const __m128 e = _mm_add_ps(a, b);	182 const __m128 e = _mm_add_ps(a, b);

179 const __m128 f = _mm_sub_ps(c, d);	183 const __m128 f = _mm_sub_ps(c, d);

180 // Interleave real and imaginary parts.	184 // Interleave real and imaginary parts.

181 const __m128 g = _mm_unpacklo_ps(e, f);	185 const __m128 g = _mm_unpacklo_ps(e, f);

182 const __m128 h = _mm_unpackhi_ps(e, f);	186 const __m128 h = _mm_unpackhi_ps(e, f);

183 // Store	187 // Store

184 _mm_storeu_ps(&fft[2 * j + 0], g);	188 _mm_storeu_ps(&fft[2 * j + 0], g);

185 _mm_storeu_ps(&fft[2 * j + 4], h);	189 _mm_storeu_ps(&fft[2 * j + 4], h);

186 }	190 }

187 // ... and fixup the first imaginary entry.	191 // ... and fixup the first imaginary entry.

188 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],	192 fft[1] = MulRe(xfBuf[0][xPos + PART_LEN],

189 -aec->xfBuf[1][xPos + PART_LEN],	193 -xfBuf[1][xPos + PART_LEN],

190 ef[0][PART_LEN],	194 ef[0][PART_LEN],

191 ef[1][PART_LEN]);	195 ef[1][PART_LEN]);

192	196

193 aec_rdft_inverse_128(fft);	197 aec_rdft_inverse_128(fft);

194 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);	198 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);

195	199

196 // fft scaling	200 // fft scaling

197 {	201 {

198 float scale = 2.0f / PART_LEN2;	202 float scale = 2.0f / PART_LEN2;

199 const __m128 scale_ps = _mm_load_ps1(&scale);	203 const __m128 scale_ps = _mm_load_ps1(&scale);

200 for (j = 0; j < PART_LEN; j += 4) {	204 for (j = 0; j < PART_LEN; j += 4) {

201 const __m128 fft_ps = _mm_loadu_ps(&fft[j]);	205 const __m128 fft_ps = _mm_loadu_ps(&fft[j]);

202 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);	206 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);

203 _mm_storeu_ps(&fft[j], fft_scale);	207 _mm_storeu_ps(&fft[j], fft_scale);

204 }	208 }

205 }	209 }

206 aec_rdft_forward_128(fft);	210 aec_rdft_forward_128(fft);

207	211

208 {	212 {

209 float wt1 = aec->wfBuf[1][pos];	213 float wt1 = wfBuf[1][pos];

210 aec->wfBuf[0][pos + PART_LEN] += fft[1];	214 wfBuf[0][pos + PART_LEN] += fft[1];

211 for (j = 0; j < PART_LEN; j += 4) {	215 for (j = 0; j < PART_LEN; j += 4) {

212 __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);	216 __m128 wtBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]);

213 __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);	217 __m128 wtBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]);

214 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);	218 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);

215 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);	219 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);

216 const __m128 fft_re =	220 const __m128 fft_re =

217 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));	221 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));

218 const __m128 fft_im =	222 const __m128 fft_im =

219 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));	223 _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));

220 wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);	224 wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);

221 wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);	225 wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);

222 _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);	226 _mm_storeu_ps(&wfBuf[0][pos + j], wtBuf_re);

223 _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);	227 _mm_storeu_ps(&wfBuf[1][pos + j], wtBuf_im);

224 }	228 }

225 aec->wfBuf[1][pos] = wt1;	229 wfBuf[1][pos] = wt1;

226 }	230 }

227 }	231 }

228 }	232 }

229	233

230 static __m128 mm_pow_ps(__m128 a, __m128 b) {	234 static __m128 mm_pow_ps(__m128 a, __m128 b) {

231 // a^b = exp2(b * log2(a))	235 // a^b = exp2(b * log2(a))

232 // exp2(x) and log2(x) are calculated using polynomial approximations.	236 // exp2(x) and log2(x) are calculated using polynomial approximations.

233 __m128 log2_a, b_log2_a, a_exp_b;	237 __m128 log2_a, b_log2_a, a_exp_b;

234	238

235 // Calculate log2(x), x = a.	239 // Calculate log2(x), x = a.

(...skipping 495 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
731 }	735 }

732 }	736 }

733	737

734 void WebRtcAec_InitAec_SSE2(void) {	738 void WebRtcAec_InitAec_SSE2(void) {

735 WebRtcAec_FilterFar = FilterFarSSE2;	739 WebRtcAec_FilterFar = FilterFarSSE2;

736 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;	740 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;

737 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;	741 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;

738 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;	742 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;

739 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;	743 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;

740 }	744 }

OLD	NEW

« webrtc/modules/audio_processing/aec/aec_core_mips.c ('K') | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »