Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(167)

Unified Diff: webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc

Issue 2348213002: Move the aec_rdft* files to a more proper place beneath APM and make them thread-safe. (Closed)
Patch Set: Rebase Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
similarity index 85%
rename from webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc
rename to webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
index b4e453ff53ea2efdeb6888c5c1b22e3a92df7d16..03f6b31f0ff91799593a13e9ebcc7bf085a0ac1f 100644
--- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc
+++ b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc
@@ -8,14 +8,32 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+#include "webrtc/modules/audio_processing//utility/ooura_fft.h"
#include <emmintrin.h>
-static const ALIGN16_BEG float ALIGN16_END
- k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h"
+#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"
-static void cft1st_128_SSE2(float* a) {
+namespace webrtc {
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+namespace {
+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+static __inline __m128 _mm_castsi128_ps(__m128i a) {
+ return *(__m128*)&a;
+}
+static __inline __m128i _mm_castps_si128(__m128 a) {
+ return *(__m128i*)&a;
+}
+#endif
+
+} // namespace
+
+void cft1st_128_SSE2(float* a) {
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
int j, k2;
@@ -78,7 +96,7 @@ static void cft1st_128_SSE2(float* a) {
}
}
-static void cftmdl_128_SSE2(float* a) {
+void cftmdl_128_SSE2(float* a) {
const int l = 8;
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
int j0;
@@ -89,12 +107,12 @@ static void cftmdl_128_SSE2(float* a) {
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
- const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
- _mm_castsi128_ps(a_32),
- _MM_SHUFFLE(1, 0, 1, 0));
- const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
- _mm_castsi128_ps(a_40),
- _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_00_32 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_08_40 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),
+ _MM_SHUFFLE(1, 0, 1, 0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
@@ -102,12 +120,12 @@ static void cftmdl_128_SSE2(float* a) {
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
- const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
- _mm_castsi128_ps(a_48),
- _MM_SHUFFLE(1, 0, 1, 0));
- const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
- _mm_castsi128_ps(a_56),
- _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_16_48 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_24_56 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),
+ _MM_SHUFFLE(1, 0, 1, 0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
@@ -163,12 +181,12 @@ static void cftmdl_128_SSE2(float* a) {
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
- const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
- _mm_castsi128_ps(a_32),
- _MM_SHUFFLE(1, 0, 1, 0));
- const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
- _mm_castsi128_ps(a_40),
- _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_00_32 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_08_40 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40),
+ _MM_SHUFFLE(1, 0, 1, 0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
@@ -176,22 +194,21 @@ static void cftmdl_128_SSE2(float* a) {
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
- const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
- _mm_castsi128_ps(a_48),
- _MM_SHUFFLE(1, 0, 1, 0));
- const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
- _mm_castsi128_ps(a_56),
- _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_16_48 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48),
+ _MM_SHUFFLE(1, 0, 1, 0));
+ const __m128 a_24_56 =
+ _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56),
+ _MM_SHUFFLE(1, 0, 1, 0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
- const __m128 xx3 =
- _mm_mul_ps(wk2iv,
- _mm_castsi128_ps(_mm_shuffle_epi32(
- _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
+ const __m128 xx3 = _mm_mul_ps(
+ wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
+ _MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx4 = _mm_add_ps(xx2, xx3);
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
@@ -202,16 +219,14 @@ static void cftmdl_128_SSE2(float* a) {
const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
const __m128 xx11 = _mm_mul_ps(
- wk1iv,
- _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
- _MM_SHUFFLE(2, 3, 0, 1))));
+ wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+ _MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx12 = _mm_add_ps(xx10, xx11);
const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
const __m128 xx21 = _mm_mul_ps(
- wk3iv,
- _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
- _MM_SHUFFLE(2, 3, 0, 1))));
+ wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+ _MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx22 = _mm_add_ps(xx20, xx21);
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
@@ -237,13 +252,13 @@ static void cftmdl_128_SSE2(float* a) {
}
}
-static void rftfsub_128_SSE2(float* a) {
+void rftfsub_128_SSE2(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
- static const ALIGN16_BEG float ALIGN16_END
- k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+ static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,
+ 0.5f};
const __m128 mm_half = _mm_load_ps(k_half);
// Vectorized code (four at once).
@@ -327,13 +342,13 @@ static void rftfsub_128_SSE2(float* a) {
}
}
-static void rftbsub_128_SSE2(float* a) {
+void rftbsub_128_SSE2(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
- static const ALIGN16_BEG float ALIGN16_END
- k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+ static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f,
+ 0.5f};
const __m128 mm_half = _mm_load_ps(k_half);
a[1] = -a[1];
@@ -418,10 +433,6 @@ static void rftbsub_128_SSE2(float* a) {
}
a[65] = -a[65];
}
+#endif
-void aec_rdft_init_sse2(void) {
- cft1st_128 = cft1st_128_SSE2;
- cftmdl_128 = cftmdl_128_SSE2;
- rftfsub_128 = rftfsub_128_SSE2;
- rftbsub_128 = rftbsub_128_SSE2;
-}
+} // namespace webrtc

Powered by Google App Engine
This is Rietveld 408576698