OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
73 for (; j < PART_LEN1; j++) { | 73 for (; j < PART_LEN1; j++) { |
74 y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 74 y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], |
75 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 75 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); |
76 y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 76 y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], |
77 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 77 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); |
78 } | 78 } |
79 } | 79 } |
80 } | 80 } |
81 | 81 |
82 static void ScaleErrorSignalSSE2(int extended_filter_enabled, | 82 static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
83 float normal_mu, | 83 float mu, |
84 float normal_error_threshold, | 84 float normal_error_threshold, |
85 float x_pow[PART_LEN1], | 85 float x_pow[PART_LEN1], |
86 float ef[2][PART_LEN1]) { | 86 float ef[2][PART_LEN1]) { |
87 const __m128 k1e_10f = _mm_set1_ps(1e-10f); | 87 const __m128 k1e_10f = _mm_set1_ps(1e-10f); |
88 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) | 88 const __m128 kMu = _mm_set1_ps(mu); |
tlegrand-webrtc
2016/04/14 14:32:42
Same comments as for the main function.
peah-webrtc
2016/04/14 22:12:41
Done.
| |
89 : _mm_set1_ps(normal_mu); | |
90 const __m128 kThresh = extended_filter_enabled | 89 const __m128 kThresh = extended_filter_enabled |
91 ? _mm_set1_ps(kExtendedErrorThreshold) | 90 ? _mm_set1_ps(kExtendedErrorThreshold) |
92 : _mm_set1_ps(normal_error_threshold); | 91 : _mm_set1_ps(normal_error_threshold); |
93 | 92 |
94 int i; | 93 int i; |
95 // vectorized code (four at once) | 94 // vectorized code (four at once) |
96 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 95 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
97 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); | 96 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); |
98 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); | 97 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); |
99 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); | 98 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); |
(...skipping 17 matching lines...) Expand all Loading... | |
117 ef_re = _mm_or_ps(ef_re, ef_re_if); | 116 ef_re = _mm_or_ps(ef_re, ef_re_if); |
118 ef_im = _mm_or_ps(ef_im, ef_im_if); | 117 ef_im = _mm_or_ps(ef_im, ef_im_if); |
119 ef_re = _mm_mul_ps(ef_re, kMu); | 118 ef_re = _mm_mul_ps(ef_re, kMu); |
120 ef_im = _mm_mul_ps(ef_im, kMu); | 119 ef_im = _mm_mul_ps(ef_im, kMu); |
121 | 120 |
122 _mm_storeu_ps(&ef[0][i], ef_re); | 121 _mm_storeu_ps(&ef[0][i], ef_re); |
123 _mm_storeu_ps(&ef[1][i], ef_im); | 122 _mm_storeu_ps(&ef[1][i], ef_im); |
124 } | 123 } |
125 // scalar code for the remaining items. | 124 // scalar code for the remaining items. |
126 { | 125 { |
127 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; | |
128 const float error_threshold = extended_filter_enabled | 126 const float error_threshold = extended_filter_enabled |
129 ? kExtendedErrorThreshold | 127 ? kExtendedErrorThreshold |
130 : normal_error_threshold; | 128 : normal_error_threshold; |
131 for (; i < (PART_LEN1); i++) { | 129 for (; i < (PART_LEN1); i++) { |
132 float abs_ef; | 130 float abs_ef; |
133 ef[0][i] /= (x_pow[i] + 1e-10f); | 131 ef[0][i] /= (x_pow[i] + 1e-10f); |
134 ef[1][i] /= (x_pow[i] + 1e-10f); | 132 ef[1][i] /= (x_pow[i] + 1e-10f); |
135 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); | 133 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); |
136 | 134 |
137 if (abs_ef > error_threshold) { | 135 if (abs_ef > error_threshold) { |
(...skipping 598 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
736 WebRtcAec_FilterFar = FilterFarSSE2; | 734 WebRtcAec_FilterFar = FilterFarSSE2; |
737 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
738 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 736 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
739 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
740 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 738 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; |
741 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 739 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; |
742 WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 740 WebRtcAec_PartitionDelay = PartitionDelaySSE2; |
743 WebRtcAec_WindowData = WindowDataSSE2; | 741 WebRtcAec_WindowData = WindowDataSSE2; |
744 } | 742 } |
745 } // namespace webrtc | 743 } // namespace webrtc |
OLD | NEW |