| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 67 aec->wfBuf[0][pos + j], | 67 aec->wfBuf[0][pos + j], |
| 68 aec->wfBuf[1][pos + j]); | 68 aec->wfBuf[1][pos + j]); |
| 69 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], | 69 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], |
| 70 aec->xfBuf[1][xPos + j], | 70 aec->xfBuf[1][xPos + j], |
| 71 aec->wfBuf[0][pos + j], | 71 aec->wfBuf[0][pos + j], |
| 72 aec->wfBuf[1][pos + j]); | 72 aec->wfBuf[1][pos + j]); |
| 73 } | 73 } |
| 74 } | 74 } |
| 75 } | 75 } |
| 76 | 76 |
| 77 static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { | 77 static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
| 78 float normal_mu, |
| 79 float normal_error_threshold, |
| 80 float *x_pow, |
| 81 float ef[2][PART_LEN1]) { |
| 78 const __m128 k1e_10f = _mm_set1_ps(1e-10f); | 82 const __m128 k1e_10f = _mm_set1_ps(1e-10f); |
| 79 const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) | 83 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) |
| 80 : _mm_set1_ps(aec->normal_mu); | 84 : _mm_set1_ps(normal_mu); |
| 81 const __m128 kThresh = aec->extended_filter_enabled | 85 const __m128 kThresh = extended_filter_enabled |
| 82 ? _mm_set1_ps(kExtendedErrorThreshold) | 86 ? _mm_set1_ps(kExtendedErrorThreshold) |
| 83 : _mm_set1_ps(aec->normal_error_threshold); | 87 : _mm_set1_ps(normal_error_threshold); |
| 84 | 88 |
| 85 int i; | 89 int i; |
| 86 // vectorized code (four at once) | 90 // vectorized code (four at once) |
| 87 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 91 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 88 const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]); | 92 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); |
| 89 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); | 93 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); |
| 90 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); | 94 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); |
| 91 | 95 |
| 92 const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f); | 96 const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); |
| 93 __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); | 97 __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); |
| 94 __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); | 98 __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); |
| 95 const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); | 99 const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); |
| 96 const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im); | 100 const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im); |
| 97 const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2); | 101 const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2); |
| 98 const __m128 absEf = _mm_sqrt_ps(ef_sum2); | 102 const __m128 absEf = _mm_sqrt_ps(ef_sum2); |
| 99 const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh); | 103 const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh); |
| 100 __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f); | 104 __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f); |
| 101 const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus); | 105 const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus); |
| 102 __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv); | 106 __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv); |
| 103 __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv); | 107 __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv); |
| 104 ef_re_if = _mm_and_ps(bigger, ef_re_if); | 108 ef_re_if = _mm_and_ps(bigger, ef_re_if); |
| 105 ef_im_if = _mm_and_ps(bigger, ef_im_if); | 109 ef_im_if = _mm_and_ps(bigger, ef_im_if); |
| 106 ef_re = _mm_andnot_ps(bigger, ef_re); | 110 ef_re = _mm_andnot_ps(bigger, ef_re); |
| 107 ef_im = _mm_andnot_ps(bigger, ef_im); | 111 ef_im = _mm_andnot_ps(bigger, ef_im); |
| 108 ef_re = _mm_or_ps(ef_re, ef_re_if); | 112 ef_re = _mm_or_ps(ef_re, ef_re_if); |
| 109 ef_im = _mm_or_ps(ef_im, ef_im_if); | 113 ef_im = _mm_or_ps(ef_im, ef_im_if); |
| 110 ef_re = _mm_mul_ps(ef_re, kMu); | 114 ef_re = _mm_mul_ps(ef_re, kMu); |
| 111 ef_im = _mm_mul_ps(ef_im, kMu); | 115 ef_im = _mm_mul_ps(ef_im, kMu); |
| 112 | 116 |
| 113 _mm_storeu_ps(&ef[0][i], ef_re); | 117 _mm_storeu_ps(&ef[0][i], ef_re); |
| 114 _mm_storeu_ps(&ef[1][i], ef_im); | 118 _mm_storeu_ps(&ef[1][i], ef_im); |
| 115 } | 119 } |
| 116 // scalar code for the remaining items. | 120 // scalar code for the remaining items. |
| 117 { | 121 { |
| 118 const float mu = | 122 const float mu = |
| 119 aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; | 123 extended_filter_enabled ? kExtendedMu : normal_mu; |
| 120 const float error_threshold = aec->extended_filter_enabled | 124 const float error_threshold = extended_filter_enabled |
| 121 ? kExtendedErrorThreshold | 125 ? kExtendedErrorThreshold |
| 122 : aec->normal_error_threshold; | 126 : normal_error_threshold; |
| 123 for (; i < (PART_LEN1); i++) { | 127 for (; i < (PART_LEN1); i++) { |
| 124 float abs_ef; | 128 float abs_ef; |
| 125 ef[0][i] /= (aec->xPow[i] + 1e-10f); | 129 ef[0][i] /= (x_pow[i] + 1e-10f); |
| 126 ef[1][i] /= (aec->xPow[i] + 1e-10f); | 130 ef[1][i] /= (x_pow[i] + 1e-10f); |
| 127 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); | 131 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); |
| 128 | 132 |
| 129 if (abs_ef > error_threshold) { | 133 if (abs_ef > error_threshold) { |
| 130 abs_ef = error_threshold / (abs_ef + 1e-10f); | 134 abs_ef = error_threshold / (abs_ef + 1e-10f); |
| 131 ef[0][i] *= abs_ef; | 135 ef[0][i] *= abs_ef; |
| 132 ef[1][i] *= abs_ef; | 136 ef[1][i] *= abs_ef; |
| 133 } | 137 } |
| 134 | 138 |
| 135 // Stepsize factor | 139 // Stepsize factor |
| 136 ef[0][i] *= mu; | 140 ef[0][i] *= mu; |
| (...skipping 585 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 722 } | 726 } |
| 723 } | 727 } |
| 724 | 728 |
| 725 void WebRtcAec_InitAec_SSE2(void) { | 729 void WebRtcAec_InitAec_SSE2(void) { |
| 726 WebRtcAec_FilterFar = FilterFarSSE2; | 730 WebRtcAec_FilterFar = FilterFarSSE2; |
| 727 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
| 728 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 732 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
| 729 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
| 730 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 734 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; |
| 731 } | 735 } |
| OLD | NEW |