Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(731)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.c

Issue 1455163006: Ducking fix #1:Initial refactoring preparing for further AEC work (changes are bitexact). (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fixed problem with variable declared inside loop statement Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
67 aec->wfBuf[0][pos + j], 67 aec->wfBuf[0][pos + j],
68 aec->wfBuf[1][pos + j]); 68 aec->wfBuf[1][pos + j]);
69 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], 69 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
70 aec->xfBuf[1][xPos + j], 70 aec->xfBuf[1][xPos + j],
71 aec->wfBuf[0][pos + j], 71 aec->wfBuf[0][pos + j],
72 aec->wfBuf[1][pos + j]); 72 aec->wfBuf[1][pos + j]);
73 } 73 }
74 } 74 }
75 } 75 }
76 76
77 static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { 77 static void ScaleErrorSignalSSE2(int extended_filter_enabled,
78 float normal_mu,
79 float normal_error_threshold,
80 float *x_pow,
81 float ef[2][PART_LEN1]) {
78 const __m128 k1e_10f = _mm_set1_ps(1e-10f); 82 const __m128 k1e_10f = _mm_set1_ps(1e-10f);
79 const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) 83 const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
80 : _mm_set1_ps(aec->normal_mu); 84 : _mm_set1_ps(normal_mu);
81 const __m128 kThresh = aec->extended_filter_enabled 85 const __m128 kThresh = extended_filter_enabled
82 ? _mm_set1_ps(kExtendedErrorThreshold) 86 ? _mm_set1_ps(kExtendedErrorThreshold)
83 : _mm_set1_ps(aec->normal_error_threshold); 87 : _mm_set1_ps(normal_error_threshold);
84 88
85 int i; 89 int i;
86 // vectorized code (four at once) 90 // vectorized code (four at once)
87 for (i = 0; i + 3 < PART_LEN1; i += 4) { 91 for (i = 0; i + 3 < PART_LEN1; i += 4) {
88 const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]); 92 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]);
89 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); 93 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
90 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); 94 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
91 95
92 const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f); 96 const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f);
93 __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); 97 __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
94 __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); 98 __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
95 const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); 99 const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
96 const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im); 100 const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
97 const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2); 101 const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);
98 const __m128 absEf = _mm_sqrt_ps(ef_sum2); 102 const __m128 absEf = _mm_sqrt_ps(ef_sum2);
99 const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh); 103 const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);
100 __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f); 104 __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);
101 const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus); 105 const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
102 __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv); 106 __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
103 __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv); 107 __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
104 ef_re_if = _mm_and_ps(bigger, ef_re_if); 108 ef_re_if = _mm_and_ps(bigger, ef_re_if);
105 ef_im_if = _mm_and_ps(bigger, ef_im_if); 109 ef_im_if = _mm_and_ps(bigger, ef_im_if);
106 ef_re = _mm_andnot_ps(bigger, ef_re); 110 ef_re = _mm_andnot_ps(bigger, ef_re);
107 ef_im = _mm_andnot_ps(bigger, ef_im); 111 ef_im = _mm_andnot_ps(bigger, ef_im);
108 ef_re = _mm_or_ps(ef_re, ef_re_if); 112 ef_re = _mm_or_ps(ef_re, ef_re_if);
109 ef_im = _mm_or_ps(ef_im, ef_im_if); 113 ef_im = _mm_or_ps(ef_im, ef_im_if);
110 ef_re = _mm_mul_ps(ef_re, kMu); 114 ef_re = _mm_mul_ps(ef_re, kMu);
111 ef_im = _mm_mul_ps(ef_im, kMu); 115 ef_im = _mm_mul_ps(ef_im, kMu);
112 116
113 _mm_storeu_ps(&ef[0][i], ef_re); 117 _mm_storeu_ps(&ef[0][i], ef_re);
114 _mm_storeu_ps(&ef[1][i], ef_im); 118 _mm_storeu_ps(&ef[1][i], ef_im);
115 } 119 }
116 // scalar code for the remaining items. 120 // scalar code for the remaining items.
117 { 121 {
118 const float mu = 122 const float mu =
119 aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; 123 extended_filter_enabled ? kExtendedMu : normal_mu;
120 const float error_threshold = aec->extended_filter_enabled 124 const float error_threshold = extended_filter_enabled
121 ? kExtendedErrorThreshold 125 ? kExtendedErrorThreshold
122 : aec->normal_error_threshold; 126 : normal_error_threshold;
123 for (; i < (PART_LEN1); i++) { 127 for (; i < (PART_LEN1); i++) {
124 float abs_ef; 128 float abs_ef;
125 ef[0][i] /= (aec->xPow[i] + 1e-10f); 129 ef[0][i] /= (x_pow[i] + 1e-10f);
126 ef[1][i] /= (aec->xPow[i] + 1e-10f); 130 ef[1][i] /= (x_pow[i] + 1e-10f);
127 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 131 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
128 132
129 if (abs_ef > error_threshold) { 133 if (abs_ef > error_threshold) {
130 abs_ef = error_threshold / (abs_ef + 1e-10f); 134 abs_ef = error_threshold / (abs_ef + 1e-10f);
131 ef[0][i] *= abs_ef; 135 ef[0][i] *= abs_ef;
132 ef[1][i] *= abs_ef; 136 ef[1][i] *= abs_ef;
133 } 137 }
134 138
135 // Stepsize factor 139 // Stepsize factor
136 ef[0][i] *= mu; 140 ef[0][i] *= mu;
(...skipping 585 matching lines...) Expand 10 before | Expand all | Expand 10 after
722 } 726 }
723 } 727 }
724 728
725 void WebRtcAec_InitAec_SSE2(void) { 729 void WebRtcAec_InitAec_SSE2(void) {
726 WebRtcAec_FilterFar = FilterFarSSE2; 730 WebRtcAec_FilterFar = FilterFarSSE2;
727 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
728 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 732 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
729 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
730 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 734 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
731 } 735 }
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698