webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc - Issue 2348213002: Move the aec_rdft* files to a more proper place beneath APM and make them thread-safe.

Unified Diff: webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc

Issue 2348213002: Move the aec_rdft* files to a more proper place beneath APM and make them thread-safe. (Closed)

Patch Set: Rebase Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « webrtc/modules/audio_processing/utility/ooura_fft_neon.cc ('k') | webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc

diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc

similarity index 85%

rename from webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc

rename to webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc

index b4e453ff53ea2efdeb6888c5c1b22e3a92df7d16..03f6b31f0ff91799593a13e9ebcc7bf085a0ac1f 100644

--- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc

+++ b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc

@@ -8,14 +8,32 @@

* be found in the AUTHORS file in the root of the source tree.

-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"

+#include "webrtc/modules/audio_processing//utility/ooura_fft.h"

#include <emmintrin.h>

-static const ALIGN16_BEG float ALIGN16_END

- k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};

+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"

+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"

-static void cft1st_128_SSE2(float* a) {

+namespace webrtc {

+#if defined(WEBRTC_ARCH_X86_FAMILY)

+namespace {

+// These intrinsics were unavailable before VS 2008.

+// TODO(andrew): move to a common file.

+#if defined(_MSC_VER) && _MSC_VER < 1500

+static __inline __m128 _mm_castsi128_ps(__m128i a) {

+ return *(__m128*)&a;

+static __inline __m128i _mm_castps_si128(__m128 a) {

+ return *(__m128i*)&a;

+#endif

+} // namespace

+void cft1st_128_SSE2(float* a) {

const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);

int j, k2;

@@ -78,7 +96,7 @@ static void cft1st_128_SSE2(float* a) {

}

-static void cftmdl_128_SSE2(float* a) {

+void cftmdl_128_SSE2(float* a) {

const int l = 8;

const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);

int j0;

@@ -89,12 +107,12 @@ static void cftmdl_128_SSE2(float* a) {

const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);

const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);

const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);

- const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),

- _mm_castsi128_ps(a_32),

- _MM_SHUFFLE(1, 0, 1, 0));

- const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),

- _mm_castsi128_ps(a_40),

- _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_00_32 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),

+ _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_08_40 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),

+ _MM_SHUFFLE(1, 0, 1, 0));

__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);

const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);

@@ -102,12 +120,12 @@ static void cftmdl_128_SSE2(float* a) {

const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);

const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);

const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);

- const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),

- _mm_castsi128_ps(a_48),

- _MM_SHUFFLE(1, 0, 1, 0));

- const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),

- _mm_castsi128_ps(a_56),

- _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_16_48 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),

+ _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_24_56 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),

+ _MM_SHUFFLE(1, 0, 1, 0));

const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);

const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);

@@ -163,12 +181,12 @@ static void cftmdl_128_SSE2(float* a) {

const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);

const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);

const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);

- const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),

- _mm_castsi128_ps(a_32),

- _MM_SHUFFLE(1, 0, 1, 0));

- const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),

- _mm_castsi128_ps(a_40),

- _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_00_32 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),

+ _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_08_40 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),

+ _MM_SHUFFLE(1, 0, 1, 0));

__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);

const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);

@@ -176,22 +194,21 @@ static void cftmdl_128_SSE2(float* a) {

const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);

const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);

const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);

- const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),

- _mm_castsi128_ps(a_48),

- _MM_SHUFFLE(1, 0, 1, 0));

- const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),

- _mm_castsi128_ps(a_56),

- _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_16_48 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),

+ _MM_SHUFFLE(1, 0, 1, 0));

+ const __m128 a_24_56 =

+ _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),

+ _MM_SHUFFLE(1, 0, 1, 0));

const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);

const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);

const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);

const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);

const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);

- const __m128 xx3 =

- _mm_mul_ps(wk2iv,

- _mm_castsi128_ps(_mm_shuffle_epi32(

- _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));

+ const __m128 xx3 = _mm_mul_ps(

+ wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),

+ _MM_SHUFFLE(2, 3, 0, 1))));

const __m128 xx4 = _mm_add_ps(xx2, xx3);

const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(

@@ -202,16 +219,14 @@ static void cftmdl_128_SSE2(float* a) {

const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);

const __m128 xx11 = _mm_mul_ps(

- wk1iv,

- _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),

- _MM_SHUFFLE(2, 3, 0, 1))));

+ wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),

+ _MM_SHUFFLE(2, 3, 0, 1))));

const __m128 xx12 = _mm_add_ps(xx10, xx11);

const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);

const __m128 xx21 = _mm_mul_ps(

- wk3iv,

- _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),

- _MM_SHUFFLE(2, 3, 0, 1))));

+ wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),

+ _MM_SHUFFLE(2, 3, 0, 1))));

const __m128 xx22 = _mm_add_ps(xx20, xx21);

_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));

@@ -237,13 +252,13 @@ static void cftmdl_128_SSE2(float* a) {

}

-static void rftfsub_128_SSE2(float* a) {

+void rftfsub_128_SSE2(float* a) {

const float* c = rdft_w + 32;

int j1, j2, k1, k2;

float wkr, wki, xr, xi, yr, yi;

- static const ALIGN16_BEG float ALIGN16_END

- k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};

+ static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,

+ 0.5f};

const __m128 mm_half = _mm_load_ps(k_half);

// Vectorized code (four at once).

@@ -327,13 +342,13 @@ static void rftfsub_128_SSE2(float* a) {

}

-static void rftbsub_128_SSE2(float* a) {

+void rftbsub_128_SSE2(float* a) {

const float* c = rdft_w + 32;

int j1, j2, k1, k2;

float wkr, wki, xr, xi, yr, yi;

- static const ALIGN16_BEG float ALIGN16_END

- k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};

+ static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,

+ 0.5f};

const __m128 mm_half = _mm_load_ps(k_half);

a[1] = -a[1];

@@ -418,10 +433,6 @@ static void rftbsub_128_SSE2(float* a) {

}

a[65] = -a[65];

}

+#endif

-void aec_rdft_init_sse2(void) {

- cft1st_128 = cft1st_128_SSE2;

- cftmdl_128 = cftmdl_128_SSE2;

- rftfsub_128 = rftfsub_128_SSE2;

- rftbsub_128 = rftbsub_128_SSE2;

+} // namespace webrtc