Chromium Code Reviews

Side by Side Diff: webrtc/modules/audio_processing/aec3/vector_math.h

Issue 2813823002: Adding new functionality for SIMD optimizations in AEC3 (Closed)
Patch Set: Fixed build error on windows Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff |
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_
13
14 #include "webrtc/typedefs.h"
15 #if defined(WEBRTC_ARCH_X86_FAMILY)
16 #include <emmintrin.h>
17 #endif
18 #include <math.h>
19 #include <algorithm>
20 #include <array>
21 #include <functional>
22
23 #include "webrtc/base/array_view.h"
24 #include "webrtc/base/checks.h"
25 #include "webrtc/modules/audio_processing/aec3/aec3_common.h"
26
27 namespace webrtc {
28 namespace aec3 {
29
30 // Provides optimizations for mathematical operations based on vectors.
31 class VectorMath {
32 public:
33 explicit VectorMath(Aec3Optimization optimization)
34 : optimization_(optimization) {}
35
36 // Elementwise square root.
37 void Sqrt(rtc::ArrayView<float> x) {
38 switch (optimization_) {
39 #if defined(WEBRTC_ARCH_X86_FAMILY)
40 case Aec3Optimization::kSse2: {
41 const int x_size = static_cast<int>(x.size());
42 const int vector_limit = x_size >> 2;
43
44 int j = 0;
45 for (; j < vector_limit * 4; j += 4) {
46 __m128 g = _mm_loadu_ps(&x[j]);
47 g = _mm_sqrt_ps(g);
48 _mm_storeu_ps(&x[j], g);
49 }
50
51 for (; j < x_size; ++j) {
52 x[j] = sqrtf(x[j]);
53 }
54 } break;
55 #endif
56 default:
57 std::for_each(x.begin(), x.end(), [](float& a) { a = sqrtf(a); });
58 }
59 }
60
61 // Elementwise vector multiplication z = x * y.
62 void Multiply(rtc::ArrayView<const float> x,
63 rtc::ArrayView<const float> y,
64 rtc::ArrayView<float> z) {
65 RTC_DCHECK_EQ(z.size(), x.size());
66 RTC_DCHECK_EQ(z.size(), y.size());
67 switch (optimization_) {
68 #if defined(WEBRTC_ARCH_X86_FAMILY)
69 case Aec3Optimization::kSse2: {
70 const int x_size = static_cast<int>(x.size());
71 const int vector_limit = x_size >> 2;
72
73 int j = 0;
74 for (; j < vector_limit * 4; j += 4) {
75 const __m128 x_j = _mm_loadu_ps(&x[j]);
76 const __m128 y_j = _mm_loadu_ps(&y[j]);
77 const __m128 z_j = _mm_mul_ps(x_j, y_j);
78 _mm_storeu_ps(&z[j], z_j);
79 }
80
81 for (; j < x_size; ++j) {
82 z[j] = x[j] * y[j];
83 }
84 } break;
85 #endif
86 default:
87 std::transform(x.begin(), x.end(), y.begin(), z.begin(),
88 std::multiplies<float>());
89 }
90 }
91
92 // Elementwise vector accumulation z += x.
93 void Accumulate(rtc::ArrayView<const float> x, rtc::ArrayView<float> z) {
94 RTC_DCHECK_EQ(z.size(), x.size());
95 switch (optimization_) {
96 #if defined(WEBRTC_ARCH_X86_FAMILY)
97 case Aec3Optimization::kSse2: {
98 const int x_size = static_cast<int>(x.size());
99 const int vector_limit = x_size >> 2;
100
101 int j = 0;
102 for (; j < vector_limit * 4; j += 4) {
103 const __m128 x_j = _mm_loadu_ps(&x[j]);
104 __m128 z_j = _mm_loadu_ps(&z[j]);
105 z_j = _mm_add_ps(x_j, z_j);
106 _mm_storeu_ps(&z[j], z_j);
107 }
108
109 for (; j < x_size; ++j) {
110 z[j] += x[j];
111 }
112 } break;
113 #endif
114 default:
115 std::transform(x.begin(), x.end(), z.begin(), z.begin(),
116 std::plus<float>());
117 }
118 }
119
120 private:
121 Aec3Optimization optimization_;
122 };
123
124 } // namespace aec3
125
126 } // namespace webrtc
127
128 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_
OLDNEW

Powered by Google App Engine