Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Unified Diff: webrtc/modules/audio_processing/aec3/vector_math.h

Issue 2813823002: Adding new functionality for SIMD optimizations in AEC3 (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/aec3/vector_math.h
diff --git a/webrtc/modules/audio_processing/aec3/vector_math.h b/webrtc/modules/audio_processing/aec3/vector_math.h
new file mode 100644
index 0000000000000000000000000000000000000000..44bde257e2bca7151d368b0f5ce3ab9c91858f03
--- /dev/null
+++ b/webrtc/modules/audio_processing/aec3/vector_math.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_
+
+#include "webrtc/typedefs.h"
aleloi 2017/04/11 14:27:30 Project headers should be after system headers.
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+#include <math.h>
+#include <algorithm>
+#include <array>
+#include <functional>
+
+#include "webrtc/base/array_view.h"
+#include "webrtc/base/checks.h"
+#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+namespace aec3 {
+
+// Provides optimizations for mathematical operations based on vectors.
+class VectorMath {
+ public:
+ explicit VectorMath(Aec3Optimization optimization)
+ : optimization_(optimization) {}
+
+ // Elementwise square root.
+ void Sqrt(rtc::ArrayView<float> x) {
+ switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ case Aec3Optimization::kSse2: {
+ const size_t kVectorLimit = x.size() >> 2;
+ const size_t kRemaining = x.size() - kVectorLimit * 4;
aleloi 2017/04/11 14:27:30 I think automatic variables are usually named with
+
+ int j = 0;
+ for (size_t k = 0; k < kVectorLimit; ++k, j += 4) {
ivoc 2017/04/11 12:03:01 I think the 2 different loop variables are a bit c
peah-webrtc 2017/04/11 13:09:26 Awesome! Thanks! Done.
+ __m128 g = _mm_loadu_ps(&x[j]);
+ g = _mm_sqrt_ps(g);
+ _mm_storeu_ps(&x[j], g);
+ }
+
+ for (size_t k = 0; k < kRemaining; ++k, ++j) {
+ x[j] = sqrtf(x[j]);
+ }
+ } break;
+#endif
+ default:
+ std::for_each(x.begin(), x.end(), [](float& a) { a = sqrtf(a); });
+ }
+ }
+
+ // Elementwise vector multiplication z = x * y.
+ void Multiply(rtc::ArrayView<const float> x,
+ rtc::ArrayView<const float> y,
+ rtc::ArrayView<float> z) {
+ RTC_DCHECK_EQ(z.size(), x.size());
+ RTC_DCHECK_EQ(z.size(), y.size());
+ switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ case Aec3Optimization::kSse2: {
+ const size_t kVectorLimit = x.size() >> 2;
+ const size_t kRemaining = x.size() - kVectorLimit * 4;
+
+ int j = 0;
+ for (size_t k = 0; k < kVectorLimit; ++k, j += 4) {
ivoc 2017/04/11 12:03:01 Same here.
peah-webrtc 2017/04/11 13:09:26 Done.
+ const __m128 x_k = _mm_loadu_ps(&x[j]);
+ const __m128 y_k = _mm_loadu_ps(&y[j]);
+ const __m128 z_k = _mm_mul_ps(x_k, y_k);
+ _mm_storeu_ps(&z[j], z_k);
+ }
+
+ for (size_t k = 0; k < kRemaining; ++k, ++j) {
+ z[j] = x[j] * y[j];
+ }
+ } break;
+#endif
+ default:
+ std::transform(x.begin(), x.end(), y.begin(), z.begin(),
+ std::multiplies<float>());
aleloi 2017/04/11 14:27:30 Are we sure std::transform doesn't do this kind of
+ }
+ }
+
+ // Elementwise vector accumulation z += x.
+ void Accumulate(rtc::ArrayView<const float> x, rtc::ArrayView<float> z) {
+ RTC_DCHECK_EQ(z.size(), x.size());
+ switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ case Aec3Optimization::kSse2: {
+ const size_t kVectorLimit = x.size() >> 2;
+ const size_t kRemaining = x.size() - kVectorLimit * 4;
+
+ int j = 0;
+ for (size_t k = 0; k < kVectorLimit; ++k, j += 4) {
ivoc 2017/04/11 12:03:01 And here.
peah-webrtc 2017/04/11 13:09:26 Done.
+ const __m128 x_k = _mm_loadu_ps(&x[j]);
+ __m128 z_k = _mm_loadu_ps(&z[j]);
+ z_k = _mm_add_ps(x_k, z_k);
+ _mm_storeu_ps(&z[j], z_k);
+ }
+
+ for (size_t k = 0; k < kRemaining; ++k, ++j) {
+ z[j] += x[j];
+ }
+ } break;
+#endif
+ default:
+ std::transform(x.begin(), x.end(), z.begin(), z.begin(),
+ std::plus<float>());
+ }
+ }
+
+ private:
+ Aec3Optimization optimization_;
+};
+
+} // namespace aec3
+
+} // namespace webrtc
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_

Powered by Google App Engine
This is Rietveld 408576698