OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/common_audio/fir_filter_sse.h" | 11 #include "webrtc/common_audio/fir_filter_sse.h" |
12 | 12 |
13 #include <assert.h> | |
14 #include <stdint.h> | 13 #include <stdint.h> |
15 #include <string.h> | 14 #include <string.h> |
16 #include <xmmintrin.h> | 15 #include <xmmintrin.h> |
17 | 16 |
| 17 #include "webrtc/base/checks.h" |
18 #include "webrtc/system_wrappers/include/aligned_malloc.h" | 18 #include "webrtc/system_wrappers/include/aligned_malloc.h" |
19 | 19 |
20 namespace webrtc { | 20 namespace webrtc { |
21 | 21 |
22 FIRFilterSSE2::FIRFilterSSE2(const float* coefficients, | 22 FIRFilterSSE2::FIRFilterSSE2(const float* coefficients, |
23 size_t coefficients_length, | 23 size_t coefficients_length, |
24 size_t max_input_length) | 24 size_t max_input_length) |
25 : // Closest higher multiple of four. | 25 : // Closest higher multiple of four. |
26 coefficients_length_((coefficients_length + 3) & ~0x03), | 26 coefficients_length_((coefficients_length + 3) & ~0x03), |
27 state_length_(coefficients_length_ - 1), | 27 state_length_(coefficients_length_ - 1), |
28 coefficients_(static_cast<float*>( | 28 coefficients_(static_cast<float*>( |
29 AlignedMalloc(sizeof(float) * coefficients_length_, 16))), | 29 AlignedMalloc(sizeof(float) * coefficients_length_, 16))), |
30 state_(static_cast<float*>( | 30 state_(static_cast<float*>( |
31 AlignedMalloc(sizeof(float) * (max_input_length + state_length_), | 31 AlignedMalloc(sizeof(float) * (max_input_length + state_length_), |
32 16))) { | 32 16))) { |
33 // Add zeros at the end of the coefficients. | 33 // Add zeros at the end of the coefficients. |
34 size_t padding = coefficients_length_ - coefficients_length; | 34 size_t padding = coefficients_length_ - coefficients_length; |
35 memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0])); | 35 memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0])); |
36 // The coefficients are reversed to compensate for the order in which the | 36 // The coefficients are reversed to compensate for the order in which the |
37 // input samples are acquired (most recent last). | 37 // input samples are acquired (most recent last). |
38 for (size_t i = 0; i < coefficients_length; ++i) { | 38 for (size_t i = 0; i < coefficients_length; ++i) { |
39 coefficients_[i + padding] = coefficients[coefficients_length - i - 1]; | 39 coefficients_[i + padding] = coefficients[coefficients_length - i - 1]; |
40 } | 40 } |
41 memset(state_.get(), | 41 memset(state_.get(), |
42 0, | 42 0, |
43 (max_input_length + state_length_) * sizeof(state_[0])); | 43 (max_input_length + state_length_) * sizeof(state_[0])); |
44 } | 44 } |
45 | 45 |
46 void FIRFilterSSE2::Filter(const float* in, size_t length, float* out) { | 46 void FIRFilterSSE2::Filter(const float* in, size_t length, float* out) { |
47 assert(length > 0); | 47 RTC_DCHECK_GT(length, 0); |
48 | 48 |
49 memcpy(&state_[state_length_], in, length * sizeof(*in)); | 49 memcpy(&state_[state_length_], in, length * sizeof(*in)); |
50 | 50 |
51 // Convolves the input signal |in| with the filter kernel |coefficients_| | 51 // Convolves the input signal |in| with the filter kernel |coefficients_| |
52 // taking into account the previous state. | 52 // taking into account the previous state. |
53 for (size_t i = 0; i < length; ++i) { | 53 for (size_t i = 0; i < length; ++i) { |
54 float* in_ptr = &state_[i]; | 54 float* in_ptr = &state_[i]; |
55 float* coef_ptr = coefficients_.get(); | 55 float* coef_ptr = coefficients_.get(); |
56 | 56 |
57 __m128 m_sum = _mm_setzero_ps(); | 57 __m128 m_sum = _mm_setzero_ps(); |
(...skipping 14 matching lines...) Expand all Loading... |
72 } | 72 } |
73 m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum); | 73 m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum); |
74 _mm_store_ss(out + i, _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1))); | 74 _mm_store_ss(out + i, _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1))); |
75 } | 75 } |
76 | 76 |
77 // Update current state. | 77 // Update current state. |
78 memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0])); | 78 memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0])); |
79 } | 79 } |
80 | 80 |
81 } // namespace webrtc | 81 } // namespace webrtc |
OLD | NEW |