Index: webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc |
diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc |
index 7c29558c7e2c988079af5ad4554fe3b87369867d..3174fa762e88556d0a129a712e3a769392525c3d 100644 |
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc |
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc |
@@ -10,9 +10,6 @@ |
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h" |
-#if defined(WEBRTC_HAS_NEON) |
-#include <arm_neon.h> |
-#endif |
#include "webrtc/typedefs.h" |
#if defined(WEBRTC_ARCH_X86_FAMILY) |
#include <emmintrin.h> |
@@ -54,26 +51,6 @@ |
[](float a, float b) { return a * a + b * b; }); |
} |
} |
- |
-#if defined(WEBRTC_HAS_NEON) |
-// Computes and stores the frequency response of the filter. |
-void UpdateFrequencyResponse_NEON( |
- rtc::ArrayView<const FftData> H, |
- std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) { |
- RTC_DCHECK_EQ(H.size(), H2->size()); |
- for (size_t k = 0; k < H.size(); ++k) { |
- for (size_t j = 0; j < kFftLengthBy2; j += 4) { |
- const float32x4_t re = vld1q_f32(&H[k].re[j]); |
- const float32x4_t im = vld1q_f32(&H[k].im[j]); |
- float32x4_t H2_k_j = vmulq_f32(re, re); |
- H2_k_j = vmlaq_f32(H2_k_j, im, im); |
- vst1q_f32(&(*H2)[k][j], H2_k_j); |
- } |
- (*H2)[k][kFftLengthBy2] = H[k].re[kFftLengthBy2] * H[k].re[kFftLengthBy2] + |
- H[k].im[kFftLengthBy2] * H[k].im[kFftLengthBy2]; |
- } |
-} |
-#endif |
#if defined(WEBRTC_ARCH_X86_FAMILY) |
// Computes and stores the frequency response of the filter. |
@@ -108,25 +85,6 @@ |
} |
} |
-#if defined(WEBRTC_HAS_NEON) |
-// Computes and stores the echo return loss estimate of the filter, which is the |
-// sum of the partition frequency responses. |
-void UpdateErlEstimator_NEON( |
- const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, |
- std::array<float, kFftLengthBy2Plus1>* erl) { |
- erl->fill(0.f); |
- for (auto& H2_j : H2) { |
- for (size_t k = 0; k < kFftLengthBy2; k += 4) { |
- const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]); |
- float32x4_t erl_k = vld1q_f32(&(*erl)[k]); |
- erl_k = vaddq_f32(erl_k, H2_j_k); |
- vst1q_f32(&(*erl)[k], erl_k); |
- } |
- (*erl)[kFftLengthBy2] += H2_j[kFftLengthBy2]; |
- } |
-} |
-#endif |
- |
#if defined(WEBRTC_ARCH_X86_FAMILY) |
// Computes and stores the echo return loss estimate of the filter, which is the |
// sum of the partition frequency responses. |
@@ -162,63 +120,6 @@ |
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; |
} |
} |
- |
-#if defined(WEBRTC_HAS_NEON) |
-// Adapts the filter partitions. (NEON variant) |
-void AdaptPartitions_NEON(const RenderBuffer& render_buffer, |
- const FftData& G, |
- rtc::ArrayView<FftData> H) { |
- rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); |
- const int lim1 = |
- std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); |
- const int lim2 = H.size(); |
- constexpr int kNumFourBinBands = kFftLengthBy2 / 4; |
- FftData* H_j = &H[0]; |
- const FftData* X = &render_buffer_data[render_buffer.Position()]; |
- int limit = lim1; |
- int j = 0; |
- do { |
- for (; j < limit; ++j, ++H_j, ++X) { |
- for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { |
- const float32x4_t G_re = vld1q_f32(&G.re[k]); |
- const float32x4_t G_im = vld1q_f32(&G.im[k]); |
- const float32x4_t X_re = vld1q_f32(&X->re[k]); |
- const float32x4_t X_im = vld1q_f32(&X->im[k]); |
- const float32x4_t H_re = vld1q_f32(&H_j->re[k]); |
- const float32x4_t H_im = vld1q_f32(&H_j->im[k]); |
- const float32x4_t a = vmulq_f32(X_re, G_re); |
- const float32x4_t e = vmlaq_f32(a, X_im, G_im); |
- const float32x4_t c = vmulq_f32(X_re, G_im); |
- const float32x4_t f = vmlsq_f32(c, X_im, G_re); |
- const float32x4_t g = vaddq_f32(H_re, e); |
- const float32x4_t h = vaddq_f32(H_im, f); |
- |
- vst1q_f32(&H_j->re[k], g); |
- vst1q_f32(&H_j->im[k], h); |
- } |
- } |
- |
- X = &render_buffer_data[0]; |
- limit = lim2; |
- } while (j < lim2); |
- |
- H_j = &H[0]; |
- X = &render_buffer_data[render_buffer.Position()]; |
- limit = lim1; |
- j = 0; |
- do { |
- for (; j < limit; ++j, ++H_j, ++X) { |
- H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] + |
- X->im[kFftLengthBy2] * G.im[kFftLengthBy2]; |
- H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] - |
- X->im[kFftLengthBy2] * G.re[kFftLengthBy2]; |
- } |
- |
- X = &render_buffer_data[0]; |
- limit = lim2; |
- } while (j < lim2); |
-} |
-#endif |
#if defined(WEBRTC_ARCH_X86_FAMILY) |
// Adapts the filter partitions. (SSE2 variant) |
@@ -301,65 +202,6 @@ |
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; |
} |
} |
- |
-#if defined(WEBRTC_HAS_NEON) |
-// Produces the filter output (NEON variant). |
-void ApplyFilter_NEON(const RenderBuffer& render_buffer, |
- rtc::ArrayView<const FftData> H, |
- FftData* S) { |
- RTC_DCHECK_GE(H.size(), H.size() - 1); |
- S->re.fill(0.f); |
- S->im.fill(0.f); |
- |
- rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); |
- const int lim1 = |
- std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); |
- const int lim2 = H.size(); |
- constexpr int kNumFourBinBands = kFftLengthBy2 / 4; |
- const FftData* H_j = &H[0]; |
- const FftData* X = &render_buffer_data[render_buffer.Position()]; |
- |
- int j = 0; |
- int limit = lim1; |
- do { |
- for (; j < limit; ++j, ++H_j, ++X) { |
- for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { |
- const float32x4_t X_re = vld1q_f32(&X->re[k]); |
- const float32x4_t X_im = vld1q_f32(&X->im[k]); |
- const float32x4_t H_re = vld1q_f32(&H_j->re[k]); |
- const float32x4_t H_im = vld1q_f32(&H_j->im[k]); |
- const float32x4_t S_re = vld1q_f32(&S->re[k]); |
- const float32x4_t S_im = vld1q_f32(&S->im[k]); |
- const float32x4_t a = vmulq_f32(X_re, H_re); |
- const float32x4_t e = vmlsq_f32(a, X_im, H_im); |
- const float32x4_t c = vmulq_f32(X_re, H_im); |
- const float32x4_t f = vmlaq_f32(c, X_im, H_re); |
- const float32x4_t g = vaddq_f32(S_re, e); |
- const float32x4_t h = vaddq_f32(S_im, f); |
- vst1q_f32(&S->re[k], g); |
- vst1q_f32(&S->im[k], h); |
- } |
- } |
- limit = lim2; |
- X = &render_buffer_data[0]; |
- } while (j < lim2); |
- |
- H_j = &H[0]; |
- X = &render_buffer_data[render_buffer.Position()]; |
- j = 0; |
- limit = lim1; |
- do { |
- for (; j < limit; ++j, ++H_j, ++X) { |
- S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - |
- X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; |
- S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + |
- X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; |
- } |
- limit = lim2; |
- X = &render_buffer_data[0]; |
- } while (j < lim2); |
-} |
-#endif |
#if defined(WEBRTC_ARCH_X86_FAMILY) |
// Produces the filter output (SSE2 variant). |
@@ -464,11 +306,6 @@ |
aec3::ApplyFilter_SSE2(render_buffer, H_, S); |
break; |
#endif |
-#if defined(WEBRTC_HAS_NEON) |
- case Aec3Optimization::kNeon: |
- aec3::ApplyFilter_NEON(render_buffer, H_, S); |
- break; |
-#endif |
default: |
aec3::ApplyFilter(render_buffer, H_, S); |
} |
@@ -481,11 +318,6 @@ |
#if defined(WEBRTC_ARCH_X86_FAMILY) |
case Aec3Optimization::kSse2: |
aec3::AdaptPartitions_SSE2(render_buffer, G, H_); |
- break; |
-#endif |
-#if defined(WEBRTC_HAS_NEON) |
- case Aec3Optimization::kNeon: |
- aec3::AdaptPartitions_NEON(render_buffer, G, H_); |
break; |
#endif |
default: |
@@ -506,12 +338,6 @@ |
aec3::UpdateErlEstimator_SSE2(H2_, &erl_); |
break; |
#endif |
-#if defined(WEBRTC_HAS_NEON) |
- case Aec3Optimization::kNeon: |
- aec3::UpdateFrequencyResponse_NEON(H_, &H2_); |
- aec3::UpdateErlEstimator_NEON(H2_, &erl_); |
- break; |
-#endif |
default: |
aec3::UpdateFrequencyResponse(H_, &H2_); |
aec3::UpdateErlEstimator(H2_, &erl_); |