webrtc/modules/audio_processing/aec/aec_core_sse2.cc - Issue 1713923002: Moved the AEC C code to be built using C++

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.cc

Issue 1713923002: Moved the AEC C code to be built using C++ (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Format changes to comply with lint Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 /*	11 /*

12 * The core AEC algorithm, SSE2 version of speed-critical functions.	12 * The core AEC algorithm, SSE2 version of speed-critical functions.

13 */	13 */

14	14

15 #include <emmintrin.h>	15 #include <emmintrin.h>

16 #include <math.h>	16 #include <math.h>

17 #include <string.h> // memset	17 #include <string.h> // memset

18	18

	19 extern "C" {

19 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

	21 }

20 #include "webrtc/modules/audio_processing/aec/aec_common.h"	22 #include "webrtc/modules/audio_processing/aec/aec_common.h"

21 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"	23 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"

	24 extern "C" {

22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"	25 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"

	26 }

23	27

24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {	28 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {

25 return aRe * bRe - aIm * bIm;	29 return aRe * bRe - aIm * bIm;

26 }	30 }

27	31

28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {	32 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {

29 return aRe * bIm + aIm * bRe;	33 return aRe * bIm + aIm * bRe;

30 }	34 }

31	35

32 static void FilterFarSSE2(int num_partitions,	36 static void FilterFarSSE2(int num_partitions,

(...skipping 216 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
249 // compensate the fact that the exponent has been shifted in the top/	253 // compensate the fact that the exponent has been shifted in the top/

250 // fractional part and finally getting rid of the implicit leading one	254 // fractional part and finally getting rid of the implicit leading one

251 // from the mantissa by substracting it out.	255 // from the mantissa by substracting it out.

252 static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {	256 static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {

253 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};	257 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};

254 static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {	258 static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {

255 0x43800000, 0x43800000, 0x43800000, 0x43800000};	259 0x43800000, 0x43800000, 0x43800000, 0x43800000};

256 static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {	260 static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {

257 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};	261 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};

258 static const int shift_exponent_into_top_mantissa = 8;	262 static const int shift_exponent_into_top_mantissa = 8;

259 const __m128 two_n = _mm_and_ps(a, ((__m128)float_exponent_mask));	263 const __m128 two_n =

	264 _mm_and_ps(a, (reinterpret_cast<const __m128>(float_exponent_mask)));

260 const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(	265 const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(

261 _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));	266 _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));

262 const __m128 n_0 = _mm_or_ps(n_1, ((__m128)eight_biased_exponent));	267 const __m128 n_0 =

263 const __m128 n = _mm_sub_ps(n_0, ((__m128)implicit_leading_one));	268 _mm_or_ps(n_1, (reinterpret_cast<const __m128>(eight_biased_exponent)));

	269 const __m128 n =

	270 _mm_sub_ps(n_0, (reinterpret_cast<const __m128>(implicit_leading_one)));

264	271

265 // Compute y.	272 // Compute y.

266 static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {	273 static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {

267 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};	274 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};

268 static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {	275 static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {

269 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};	276 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};

270 const __m128 mantissa = _mm_and_ps(a, ((__m128)mantissa_mask));	277 const __m128 mantissa =

	278 _mm_and_ps(a, (reinterpret_cast<const __m128>(mantissa_mask)));

271 const __m128 y =	279 const __m128 y =

272 _mm_or_ps(mantissa, ((__m128)zero_biased_exponent_is_one));	280 _mm_or_ps(mantissa,

	281 (reinterpret_cast<const __m128>(zero_biased_exponent_is_one)));

273	282

274 // Approximate log2(y) ~= (y - 1) * pol5(y).	283 // Approximate log2(y) ~= (y - 1) * pol5(y).

275 // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0	284 // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0

276 static const ALIGN16_BEG float ALIGN16_END C5[4] = {	285 static const ALIGN16_BEG float ALIGN16_END C5[4] = {

277 -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};	286 -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};

278 static const ALIGN16_BEG float ALIGN16_END C4[4] = {	287 static const ALIGN16_BEG float ALIGN16_END C4[4] = {

279 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};	288 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};

280 static const ALIGN16_BEG float ALIGN16_END C3[4] = {	289 static const ALIGN16_BEG float ALIGN16_END C3[4] = {

281 -1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};	290 -1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};

282 static const ALIGN16_BEG float ALIGN16_END C2[4] = {2.5988452f, 2.5988452f,	291 static const ALIGN16_BEG float ALIGN16_END C2[4] = {2.5988452f, 2.5988452f,

283 2.5988452f, 2.5988452f};	292 2.5988452f, 2.5988452f};

284 static const ALIGN16_BEG float ALIGN16_END C1[4] = {	293 static const ALIGN16_BEG float ALIGN16_END C1[4] = {

285 -3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};	294 -3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};

286 static const ALIGN16_BEG float ALIGN16_END C0[4] = {3.1157899f, 3.1157899f,	295 static const ALIGN16_BEG float ALIGN16_END C0[4] = {3.1157899f, 3.1157899f,

287 3.1157899f, 3.1157899f};	296 3.1157899f, 3.1157899f};

288 const __m128 pol5_y_0 = _mm_mul_ps(y, ((__m128)C5));	297 const __m128 pol5_y_0 =

289 const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, ((__m128)C4));	298 _mm_mul_ps(y, (reinterpret_cast<const __m128>(C5)));

	299 const __m128 pol5_y_1 =

	300 _mm_add_ps(pol5_y_0, (reinterpret_cast<const __m128>(C4)));

290 const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);	301 const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);

291 const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, ((__m128)C3));	302 const __m128 pol5_y_3 =

	303 _mm_add_ps(pol5_y_2, (reinterpret_cast<const __m128>(C3)));

292 const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);	304 const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);

293 const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, ((__m128)C2));	305 const __m128 pol5_y_5 =

	306 _mm_add_ps(pol5_y_4, (reinterpret_cast<const __m128>(C2)));

294 const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);	307 const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);

295 const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, ((__m128)C1));	308 const __m128 pol5_y_7 =

	309 _mm_add_ps(pol5_y_6, (reinterpret_cast<const __m128>(C1)));

296 const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);	310 const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);

297 const __m128 pol5_y = _mm_add_ps(pol5_y_8, ((__m128)C0));	311 const __m128 pol5_y =

	312 _mm_add_ps(pol5_y_8, (reinterpret_cast<const __m128>(C0)));

298 const __m128 y_minus_one =	313 const __m128 y_minus_one =

299 _mm_sub_ps(y, ((__m128)zero_biased_exponent_is_one));	314 _mm_sub_ps(y,

	315 (reinterpret_cast<const __m128>(zero_biased_exponent_is_one)));

300 const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);	316 const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);

301	317

302 // Combine parts.	318 // Combine parts.

303 log2_a = _mm_add_ps(n, log2_y);	319 log2_a = _mm_add_ps(n, log2_y);

304 }	320 }

305	321

306 // b * log2(a)	322 // b * log2(a)

307 b_log2_a = _mm_mul_ps(b, log2_a);	323 b_log2_a = _mm_mul_ps(b, log2_a);

308	324

309 // Calculate exp2(x), x = b * log2(a).	325 // Calculate exp2(x), x = b * log2(a).

310 {	326 {

311 // To calculate 2^x, we decompose x like this:	327 // To calculate 2^x, we decompose x like this:

312 // x = n + y	328 // x = n + y

313 // n is an integer, the value of x - 0.5 rounded down, therefore	329 // n is an integer, the value of x - 0.5 rounded down, therefore

314 // y is in the [0.5, 1.5) range	330 // y is in the [0.5, 1.5) range

315 //	331 //

316 // 2^x = 2^n * 2^y	332 // 2^x = 2^n * 2^y

317 // 2^n can be evaluated by playing with float representation.	333 // 2^n can be evaluated by playing with float representation.

318 // 2^y in a small range can be approximated, this code uses an order two	334 // 2^y in a small range can be approximated, this code uses an order two

319 // polynomial approximation. The coefficients have been estimated	335 // polynomial approximation. The coefficients have been estimated

320 // with the Remez algorithm and the resulting polynomial has a	336 // with the Remez algorithm and the resulting polynomial has a

321 // maximum relative error of 0.17%.	337 // maximum relative error of 0.17%.

322	338

323 // To avoid over/underflow, we reduce the range of input to ]-127, 129].	339 // To avoid over/underflow, we reduce the range of input to ]-127, 129].

324 static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,	340 static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,

325 129.f, 129.f};	341 129.f, 129.f};

326 static const ALIGN16_BEG float min_input[4] ALIGN16_END = {	342 static const ALIGN16_BEG float min_input[4] ALIGN16_END = {

327 -126.99999f, -126.99999f, -126.99999f, -126.99999f};	343 -126.99999f, -126.99999f, -126.99999f, -126.99999f};

328 const __m128 x_min = _mm_min_ps(b_log2_a, ((__m128)max_input));	344 const __m128 x_min =

329 const __m128 x_max = _mm_max_ps(x_min, ((__m128)min_input));	345 _mm_min_ps(b_log2_a, (reinterpret_cast<const __m128>(max_input)));

	346 const __m128 x_max =

	347 _mm_max_ps(x_min, (reinterpret_cast<const __m128>(min_input)));

330 // Compute n.	348 // Compute n.

331 static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f, 0.5f,	349 static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f, 0.5f,

332 0.5f};	350 0.5f};

333 const __m128 x_minus_half = _mm_sub_ps(x_max, ((__m128)half));	351 const __m128 x_minus_half =

	352 _mm_sub_ps(x_max, (reinterpret_cast<const __m128>(half)));

334 const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);	353 const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);

335 // Compute 2^n.	354 // Compute 2^n.

336 static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {	355 static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {

337 127, 127, 127, 127};	356 127, 127, 127, 127};

338 static const int float_exponent_shift = 23;	357 static const int float_exponent_shift = 23;

339 const __m128i two_n_exponent =	358 const __m128i two_n_exponent =

340 _mm_add_epi32(x_minus_half_floor, ((__m128i)float_exponent_bias));	359 _mm_add_epi32(x_minus_half_floor,

	360 (reinterpret_cast<const __m128i>(float_exponent_bias)));

341 const __m128 two_n =	361 const __m128 two_n =

342 _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));	362 _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));

343 // Compute y.	363 // Compute y.

344 const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));	364 const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));

345 // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.	365 // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.

346 static const ALIGN16_BEG float C2[4] ALIGN16_END = {	366 static const ALIGN16_BEG float C2[4] ALIGN16_END = {

347 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};	367 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};

348 static const ALIGN16_BEG float C1[4] ALIGN16_END = {	368 static const ALIGN16_BEG float C1[4] ALIGN16_END = {

349 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};	369 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};

350 static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,	370 static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,

351 1.0017247f, 1.0017247f};	371 1.0017247f, 1.0017247f};

352 const __m128 exp2_y_0 = _mm_mul_ps(y, ((__m128)C2));	372 const __m128 exp2_y_0 =

353 const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, ((__m128)C1));	373 _mm_mul_ps(y, (reinterpret_cast<const __m128>(C2)));

	374 const __m128 exp2_y_1 =

	375 _mm_add_ps(exp2_y_0, (reinterpret_cast<const __m128>(C1)));

354 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);	376 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);

355 const __m128 exp2_y = _mm_add_ps(exp2_y_2, ((__m128)C0));	377 const __m128 exp2_y =

	378 _mm_add_ps(exp2_y_2, (reinterpret_cast<const __m128>(C0)));

356	379

357 // Combine parts.	380 // Combine parts.

358 a_exp_b = _mm_mul_ps(exp2_y, two_n);	381 a_exp_b = _mm_mul_ps(exp2_y, two_n);

359 }	382 }

360 return a_exp_b;	383 return a_exp_b;

361 }	384 }

362	385

363 static void OverdriveAndSuppressSSE2(AecCore* aec,	386 static void OverdriveAndSuppressSSE2(AecCore* aec,

364 float hNl[PART_LEN1],	387 float hNl[PART_LEN1],

365 const float hNlFb,	388 const float hNlFb,

(...skipping 346 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
712 void WebRtcAec_InitAec_SSE2(void) {	735 void WebRtcAec_InitAec_SSE2(void) {

713 WebRtcAec_FilterFar = FilterFarSSE2;	736 WebRtcAec_FilterFar = FilterFarSSE2;

714 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;	737 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;

715 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;	738 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;

716 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;	739 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;

717 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;	740 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;

718 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;	741 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;

719 WebRtcAec_PartitionDelay = PartitionDelaySSE2;	742 WebRtcAec_PartitionDelay = PartitionDelaySSE2;

720 WebRtcAec_WindowData = WindowDataSSE2;	743 WebRtcAec_WindowData = WindowDataSSE2;

721 }	744 }

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_sse2.c ('k') | webrtc/modules/audio_processing/aec/echo_cancellation.h » ('j') | no next file with comments »