| Index: webrtc/modules/audio_processing/aec3/matched_filter.cc | 
| diff --git a/webrtc/modules/audio_processing/aec3/matched_filter.cc b/webrtc/modules/audio_processing/aec3/matched_filter.cc | 
| index 4c6e0d70525ad9eba85cb52043d49b82751af999..7bb5778999879f91012641f82f72c491b1df11c4 100644 | 
| --- a/webrtc/modules/audio_processing/aec3/matched_filter.cc | 
| +++ b/webrtc/modules/audio_processing/aec3/matched_filter.cc | 
| @@ -9,9 +9,6 @@ | 
| */ | 
| #include "webrtc/modules/audio_processing/aec3/matched_filter.h" | 
|  | 
| -#if defined(WEBRTC_HAS_NEON) | 
| -#include <arm_neon.h> | 
| -#endif | 
| #include "webrtc/typedefs.h" | 
| #if defined(WEBRTC_ARCH_X86_FAMILY) | 
| #include <emmintrin.h> | 
| @@ -24,114 +21,6 @@ | 
|  | 
| namespace webrtc { | 
| namespace aec3 { | 
| - | 
| -#if defined(WEBRTC_HAS_NEON) | 
| - | 
| -void MatchedFilterCore_NEON(size_t x_start_index, | 
| -                            float x2_sum_threshold, | 
| -                            rtc::ArrayView<const float> x, | 
| -                            rtc::ArrayView<const float> y, | 
| -                            rtc::ArrayView<float> h, | 
| -                            bool* filters_updated, | 
| -                            float* error_sum) { | 
| -  const int h_size = static_cast<int>(h.size()); | 
| -  const int x_size = static_cast<int>(x.size()); | 
| -  RTC_DCHECK_EQ(0, h_size % 4); | 
| - | 
| -  // Process for all samples in the sub-block. | 
| -  for (size_t i = 0; i < kSubBlockSize; ++i) { | 
| -    // Apply the matched filter as filter * x, and compute x * x. | 
| - | 
| -    RTC_DCHECK_GT(x_size, x_start_index); | 
| -    const float* x_p = &x[x_start_index]; | 
| -    const float* h_p = &h[0]; | 
| - | 
| -    // Initialize values for the accumulation. | 
| -    float32x4_t s_128 = vdupq_n_f32(0); | 
| -    float32x4_t x2_sum_128 = vdupq_n_f32(0); | 
| -    float x2_sum = 0.f; | 
| -    float s = 0; | 
| - | 
| -    // Compute loop chunk sizes until, and after, the wraparound of the circular | 
| -    // buffer for x. | 
| -    const int chunk1 = | 
| -        std::min(h_size, static_cast<int>(x_size - x_start_index)); | 
| - | 
| -    // Perform the loop in two chunks. | 
| -    const int chunk2 = h_size - chunk1; | 
| -    for (int limit : {chunk1, chunk2}) { | 
| -      // Perform 128 bit vector operations. | 
| -      const int limit_by_4 = limit >> 2; | 
| -      for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { | 
| -        // Load the data into 128 bit vectors. | 
| -        const float32x4_t x_k = vld1q_f32(x_p); | 
| -        const float32x4_t h_k = vld1q_f32(h_p); | 
| -        // Compute and accumulate x * x and h * x. | 
| -        x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k); | 
| -        s_128 = vmlaq_f32(s_128, h_k, x_k); | 
| -      } | 
| - | 
| -      // Perform non-vector operations for any remaining items. | 
| -      for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { | 
| -        const float x_k = *x_p; | 
| -        x2_sum += x_k * x_k; | 
| -        s += *h_p * x_k; | 
| -      } | 
| - | 
| -      x_p = &x[0]; | 
| -    } | 
| - | 
| -    // Combine the accumulated vector and scalar values. | 
| -    float* v = reinterpret_cast<float*>(&x2_sum_128); | 
| -    x2_sum += v[0] + v[1] + v[2] + v[3]; | 
| -    v = reinterpret_cast<float*>(&s_128); | 
| -    s += v[0] + v[1] + v[2] + v[3]; | 
| - | 
| -    // Compute the matched filter error. | 
| -    const float e = std::min(32767.f, std::max(-32768.f, y[i] - s)); | 
| -    *error_sum += e * e; | 
| - | 
| -    // Update the matched filter estimate in an NLMS manner. | 
| -    if (x2_sum > x2_sum_threshold) { | 
| -      RTC_DCHECK_LT(0.f, x2_sum); | 
| -      const float alpha = 0.7f * e / x2_sum; | 
| -      const float32x4_t alpha_128 = vmovq_n_f32(alpha); | 
| - | 
| -      // filter = filter + 0.7 * (y - filter * x) / x * x. | 
| -      float* h_p = &h[0]; | 
| -      x_p = &x[x_start_index]; | 
| - | 
| -      // Perform the loop in two chunks. | 
| -      for (int limit : {chunk1, chunk2}) { | 
| -        // Perform 128 bit vector operations. | 
| -        const int limit_by_4 = limit >> 2; | 
| -        for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { | 
| -          // Load the data into 128 bit vectors. | 
| -          float32x4_t h_k = vld1q_f32(h_p); | 
| -          const float32x4_t x_k = vld1q_f32(x_p); | 
| -          // Compute h = h + alpha * x. | 
| -          h_k = vmlaq_f32(h_k, alpha_128, x_k); | 
| - | 
| -          // Store the result. | 
| -          vst1q_f32(h_p, h_k); | 
| -        } | 
| - | 
| -        // Perform non-vector operations for any remaining items. | 
| -        for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { | 
| -          *h_p += alpha * *x_p; | 
| -        } | 
| - | 
| -        x_p = &x[0]; | 
| -      } | 
| - | 
| -      *filters_updated = true; | 
| -    } | 
| - | 
| -    x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; | 
| -  } | 
| -} | 
| - | 
| -#endif | 
|  | 
| #if defined(WEBRTC_ARCH_X86_FAMILY) | 
|  | 
| @@ -338,13 +227,6 @@ | 
| &filters_updated, &error_sum); | 
| break; | 
| #endif | 
| -#if defined(WEBRTC_HAS_NEON) | 
| -      case Aec3Optimization::kNeon: | 
| -        aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold, | 
| -                                     render_buffer.buffer, y, filters_[n], | 
| -                                     &filters_updated, &error_sum); | 
| -        break; | 
| -#endif | 
| default: | 
| aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, | 
| render_buffer.buffer, y, filters_[n], | 
|  |