| OLD | NEW | 
| (Empty) |  | 
 |    1 /* | 
 |    2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. | 
 |    3  * | 
 |    4  *  Use of this source code is governed by a BSD-style license | 
 |    5  *  that can be found in the LICENSE file in the root of the source | 
 |    6  *  tree. An additional intellectual property rights grant can be found | 
 |    7  *  in the file PATENTS.  All contributing project authors may | 
 |    8  *  be found in the AUTHORS file in the root of the source tree. | 
 |    9  */ | 
 |   10  | 
 |   11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ | 
 |   12 #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ | 
 |   13  | 
 |   14 #include "webrtc/typedefs.h" | 
 |   15 #if defined(WEBRTC_ARCH_X86_FAMILY) | 
 |   16 #include <emmintrin.h> | 
 |   17 #endif | 
 |   18 #include <math.h> | 
 |   19 #include <algorithm> | 
 |   20 #include <array> | 
 |   21 #include <functional> | 
 |   22  | 
 |   23 #include "webrtc/base/array_view.h" | 
 |   24 #include "webrtc/base/checks.h" | 
 |   25 #include "webrtc/modules/audio_processing/aec3/aec3_common.h" | 
 |   26  | 
 |   27 namespace webrtc { | 
 |   28 namespace aec3 { | 
 |   29  | 
 |   30 // Provides optimizations for mathematical operations based on vectors. | 
 |   31 class VectorMath { | 
 |   32  public: | 
 |   33   explicit VectorMath(Aec3Optimization optimization) | 
 |   34       : optimization_(optimization) {} | 
 |   35  | 
 |   36   // Elementwise square root. | 
 |   37   void Sqrt(rtc::ArrayView<float> x) { | 
 |   38     switch (optimization_) { | 
 |   39 #if defined(WEBRTC_ARCH_X86_FAMILY) | 
 |   40       case Aec3Optimization::kSse2: { | 
 |   41         const int x_size = static_cast<int>(x.size()); | 
 |   42         const int vector_limit = x_size >> 2; | 
 |   43  | 
 |   44         int j = 0; | 
 |   45         for (; j < vector_limit * 4; j += 4) { | 
 |   46           __m128 g = _mm_loadu_ps(&x[j]); | 
 |   47           g = _mm_sqrt_ps(g); | 
 |   48           _mm_storeu_ps(&x[j], g); | 
 |   49         } | 
 |   50  | 
 |   51         for (; j < x_size; ++j) { | 
 |   52           x[j] = sqrtf(x[j]); | 
 |   53         } | 
 |   54       } break; | 
 |   55 #endif | 
 |   56       default: | 
 |   57         std::for_each(x.begin(), x.end(), [](float& a) { a = sqrtf(a); }); | 
 |   58     } | 
 |   59   } | 
 |   60  | 
 |   61   // Elementwise vector multiplication z = x * y. | 
 |   62   void Multiply(rtc::ArrayView<const float> x, | 
 |   63                 rtc::ArrayView<const float> y, | 
 |   64                 rtc::ArrayView<float> z) { | 
 |   65     RTC_DCHECK_EQ(z.size(), x.size()); | 
 |   66     RTC_DCHECK_EQ(z.size(), y.size()); | 
 |   67     switch (optimization_) { | 
 |   68 #if defined(WEBRTC_ARCH_X86_FAMILY) | 
 |   69       case Aec3Optimization::kSse2: { | 
 |   70         const int x_size = static_cast<int>(x.size()); | 
 |   71         const int vector_limit = x_size >> 2; | 
 |   72  | 
 |   73         int j = 0; | 
 |   74         for (; j < vector_limit * 4; j += 4) { | 
 |   75           const __m128 x_j = _mm_loadu_ps(&x[j]); | 
 |   76           const __m128 y_j = _mm_loadu_ps(&y[j]); | 
 |   77           const __m128 z_j = _mm_mul_ps(x_j, y_j); | 
 |   78           _mm_storeu_ps(&z[j], z_j); | 
 |   79         } | 
 |   80  | 
 |   81         for (; j < x_size; ++j) { | 
 |   82           z[j] = x[j] * y[j]; | 
 |   83         } | 
 |   84       } break; | 
 |   85 #endif | 
 |   86       default: | 
 |   87         std::transform(x.begin(), x.end(), y.begin(), z.begin(), | 
 |   88                        std::multiplies<float>()); | 
 |   89     } | 
 |   90   } | 
 |   91  | 
 |   92   // Elementwise vector accumulation z += x. | 
 |   93   void Accumulate(rtc::ArrayView<const float> x, rtc::ArrayView<float> z) { | 
 |   94     RTC_DCHECK_EQ(z.size(), x.size()); | 
 |   95     switch (optimization_) { | 
 |   96 #if defined(WEBRTC_ARCH_X86_FAMILY) | 
 |   97       case Aec3Optimization::kSse2: { | 
 |   98         const int x_size = static_cast<int>(x.size()); | 
 |   99         const int vector_limit = x_size >> 2; | 
 |  100  | 
 |  101         int j = 0; | 
 |  102         for (; j < vector_limit * 4; j += 4) { | 
 |  103           const __m128 x_j = _mm_loadu_ps(&x[j]); | 
 |  104           __m128 z_j = _mm_loadu_ps(&z[j]); | 
 |  105           z_j = _mm_add_ps(x_j, z_j); | 
 |  106           _mm_storeu_ps(&z[j], z_j); | 
 |  107         } | 
 |  108  | 
 |  109         for (; j < x_size; ++j) { | 
 |  110           z[j] += x[j]; | 
 |  111         } | 
 |  112       } break; | 
 |  113 #endif | 
 |  114       default: | 
 |  115         std::transform(x.begin(), x.end(), z.begin(), z.begin(), | 
 |  116                        std::plus<float>()); | 
 |  117     } | 
 |  118   } | 
 |  119  | 
 |  120  private: | 
 |  121   Aec3Optimization optimization_; | 
 |  122 }; | 
 |  123  | 
 |  124 }  // namespace aec3 | 
 |  125  | 
 |  126 }  // namespace webrtc | 
 |  127  | 
 |  128 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ | 
| OLD | NEW |