| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 74 // scalar code for the remaining items. | 74 // scalar code for the remaining items. |
| 75 for (; j < PART_LEN1; j++) { | 75 for (; j < PART_LEN1; j++) { |
| 76 y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 76 y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], |
| 77 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 77 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); |
| 78 y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 78 y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], |
| 79 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 79 h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); |
| 80 } | 80 } |
| 81 } | 81 } |
| 82 } | 82 } |
| 83 | 83 |
| 84 static void ScaleErrorSignalSSE2(int extended_filter_enabled, | 84 static void ScaleErrorSignalSSE2(float mu, |
| 85 float normal_mu, | 85 float error_threshold, |
| 86 float normal_error_threshold, | |
| 87 float x_pow[PART_LEN1], | 86 float x_pow[PART_LEN1], |
| 88 float ef[2][PART_LEN1]) { | 87 float ef[2][PART_LEN1]) { |
| 89 const __m128 k1e_10f = _mm_set1_ps(1e-10f); | 88 const __m128 k1e_10f = _mm_set1_ps(1e-10f); |
| 90 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) | 89 const __m128 kMu = _mm_set1_ps(mu); |
| 91 : _mm_set1_ps(normal_mu); | 90 const __m128 kThresh = _mm_set1_ps(error_threshold); |
| 92 const __m128 kThresh = extended_filter_enabled | |
| 93 ? _mm_set1_ps(kExtendedErrorThreshold) | |
| 94 : _mm_set1_ps(normal_error_threshold); | |
| 95 | 91 |
| 96 int i; | 92 int i; |
| 97 // vectorized code (four at once) | 93 // vectorized code (four at once) |
| 98 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 94 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 99 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); | 95 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); |
| 100 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); | 96 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); |
| 101 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); | 97 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); |
| 102 | 98 |
| 103 const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); | 99 const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); |
| 104 __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); | 100 __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 119 ef_re = _mm_or_ps(ef_re, ef_re_if); | 115 ef_re = _mm_or_ps(ef_re, ef_re_if); |
| 120 ef_im = _mm_or_ps(ef_im, ef_im_if); | 116 ef_im = _mm_or_ps(ef_im, ef_im_if); |
| 121 ef_re = _mm_mul_ps(ef_re, kMu); | 117 ef_re = _mm_mul_ps(ef_re, kMu); |
| 122 ef_im = _mm_mul_ps(ef_im, kMu); | 118 ef_im = _mm_mul_ps(ef_im, kMu); |
| 123 | 119 |
| 124 _mm_storeu_ps(&ef[0][i], ef_re); | 120 _mm_storeu_ps(&ef[0][i], ef_re); |
| 125 _mm_storeu_ps(&ef[1][i], ef_im); | 121 _mm_storeu_ps(&ef[1][i], ef_im); |
| 126 } | 122 } |
| 127 // scalar code for the remaining items. | 123 // scalar code for the remaining items. |
| 128 { | 124 { |
| 129 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; | |
| 130 const float error_threshold = extended_filter_enabled | |
| 131 ? kExtendedErrorThreshold | |
| 132 : normal_error_threshold; | |
| 133 for (; i < (PART_LEN1); i++) { | 125 for (; i < (PART_LEN1); i++) { |
| 134 float abs_ef; | 126 float abs_ef; |
| 135 ef[0][i] /= (x_pow[i] + 1e-10f); | 127 ef[0][i] /= (x_pow[i] + 1e-10f); |
| 136 ef[1][i] /= (x_pow[i] + 1e-10f); | 128 ef[1][i] /= (x_pow[i] + 1e-10f); |
| 137 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); | 129 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); |
| 138 | 130 |
| 139 if (abs_ef > error_threshold) { | 131 if (abs_ef > error_threshold) { |
| 140 abs_ef = error_threshold / (abs_ef + 1e-10f); | 132 abs_ef = error_threshold / (abs_ef + 1e-10f); |
| 141 ef[0][i] *= abs_ef; | 133 ef[0][i] *= abs_ef; |
| 142 ef[1][i] *= abs_ef; | 134 ef[1][i] *= abs_ef; |
| (...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 738 WebRtcAec_FilterFar = FilterFarSSE2; | 730 WebRtcAec_FilterFar = FilterFarSSE2; |
| 739 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
| 740 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 732 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
| 741 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
| 742 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 734 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; |
| 743 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 735 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; |
| 744 WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 736 WebRtcAec_PartitionDelay = PartitionDelaySSE2; |
| 745 WebRtcAec_WindowData = WindowDataSSE2; | 737 WebRtcAec_WindowData = WindowDataSSE2; |
| 746 } | 738 } |
| 747 } // namespace webrtc | 739 } // namespace webrtc |
| OLD | NEW |