| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 
| 3  * | 3  * | 
| 4  *  Use of this source code is governed by a BSD-style license | 4  *  Use of this source code is governed by a BSD-style license | 
| 5  *  that can be found in the LICENSE file in the root of the source | 5  *  that can be found in the LICENSE file in the root of the source | 
| 6  *  tree. An additional intellectual property rights grant can be found | 6  *  tree. An additional intellectual property rights grant can be found | 
| 7  *  in the file PATENTS.  All contributing project authors may | 7  *  in the file PATENTS.  All contributing project authors may | 
| 8  *  be found in the AUTHORS file in the root of the source tree. | 8  *  be found in the AUTHORS file in the root of the source tree. | 
| 9  */ | 9  */ | 
| 10 | 10 | 
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 72     // scalar code for the remaining items. | 72     // scalar code for the remaining items. | 
| 73     for (; j < PART_LEN1; j++) { | 73     for (; j < PART_LEN1; j++) { | 
| 74       y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 74       y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 
| 75                            h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 75                            h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 
| 76       y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 76       y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], | 
| 77                            h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 77                            h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); | 
| 78     } | 78     } | 
| 79   } | 79   } | 
| 80 } | 80 } | 
| 81 | 81 | 
| 82 static void ScaleErrorSignalSSE2(int extended_filter_enabled, | 82 static void ScaleErrorSignalSSE2(float mu, | 
| 83                                  float normal_mu, | 83                                  float error_threshold, | 
| 84                                  float normal_error_threshold, |  | 
| 85                                  float x_pow[PART_LEN1], | 84                                  float x_pow[PART_LEN1], | 
| 86                                  float ef[2][PART_LEN1]) { | 85                                  float ef[2][PART_LEN1]) { | 
| 87   const __m128 k1e_10f = _mm_set1_ps(1e-10f); | 86   const __m128 k1e_10f = _mm_set1_ps(1e-10f); | 
| 88   const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) | 87   const __m128 kMu = _mm_set1_ps(mu); | 
| 89                                              : _mm_set1_ps(normal_mu); | 88   const __m128 kThresh = _mm_set1_ps(error_threshold); | 
| 90   const __m128 kThresh = extended_filter_enabled |  | 
| 91                              ? _mm_set1_ps(kExtendedErrorThreshold) |  | 
| 92                              : _mm_set1_ps(normal_error_threshold); |  | 
| 93 | 89 | 
| 94   int i; | 90   int i; | 
| 95   // vectorized code (four at once) | 91   // vectorized code (four at once) | 
| 96   for (i = 0; i + 3 < PART_LEN1; i += 4) { | 92   for (i = 0; i + 3 < PART_LEN1; i += 4) { | 
| 97     const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); | 93     const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); | 
| 98     const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); | 94     const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); | 
| 99     const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); | 95     const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); | 
| 100 | 96 | 
| 101     const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); | 97     const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); | 
| 102     __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); | 98     __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); | 
| (...skipping 14 matching lines...) Expand all  Loading... | 
| 117     ef_re = _mm_or_ps(ef_re, ef_re_if); | 113     ef_re = _mm_or_ps(ef_re, ef_re_if); | 
| 118     ef_im = _mm_or_ps(ef_im, ef_im_if); | 114     ef_im = _mm_or_ps(ef_im, ef_im_if); | 
| 119     ef_re = _mm_mul_ps(ef_re, kMu); | 115     ef_re = _mm_mul_ps(ef_re, kMu); | 
| 120     ef_im = _mm_mul_ps(ef_im, kMu); | 116     ef_im = _mm_mul_ps(ef_im, kMu); | 
| 121 | 117 | 
| 122     _mm_storeu_ps(&ef[0][i], ef_re); | 118     _mm_storeu_ps(&ef[0][i], ef_re); | 
| 123     _mm_storeu_ps(&ef[1][i], ef_im); | 119     _mm_storeu_ps(&ef[1][i], ef_im); | 
| 124   } | 120   } | 
| 125   // scalar code for the remaining items. | 121   // scalar code for the remaining items. | 
| 126   { | 122   { | 
| 127     const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; |  | 
| 128     const float error_threshold = extended_filter_enabled |  | 
| 129                                       ? kExtendedErrorThreshold |  | 
| 130                                       : normal_error_threshold; |  | 
| 131     for (; i < (PART_LEN1); i++) { | 123     for (; i < (PART_LEN1); i++) { | 
| 132       float abs_ef; | 124       float abs_ef; | 
| 133       ef[0][i] /= (x_pow[i] + 1e-10f); | 125       ef[0][i] /= (x_pow[i] + 1e-10f); | 
| 134       ef[1][i] /= (x_pow[i] + 1e-10f); | 126       ef[1][i] /= (x_pow[i] + 1e-10f); | 
| 135       abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); | 127       abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); | 
| 136 | 128 | 
| 137       if (abs_ef > error_threshold) { | 129       if (abs_ef > error_threshold) { | 
| 138         abs_ef = error_threshold / (abs_ef + 1e-10f); | 130         abs_ef = error_threshold / (abs_ef + 1e-10f); | 
| 139         ef[0][i] *= abs_ef; | 131         ef[0][i] *= abs_ef; | 
| 140         ef[1][i] *= abs_ef; | 132         ef[1][i] *= abs_ef; | 
| (...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 736   WebRtcAec_FilterFar = FilterFarSSE2; | 728   WebRtcAec_FilterFar = FilterFarSSE2; | 
| 737   WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 729   WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 
| 738   WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 730   WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 
| 739   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 731   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 
| 740   WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 732   WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 
| 741   WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 733   WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 
| 742   WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 734   WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 
| 743   WebRtcAec_WindowData = WindowDataSSE2; | 735   WebRtcAec_WindowData = WindowDataSSE2; | 
| 744 } | 736 } | 
| 745 }  // namespace webrtc | 737 }  // namespace webrtc | 
| OLD | NEW | 
|---|