Index: webrtc/modules/audio_processing/aec/aec_core_sse2.cc |
diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc |
index 47167eca636319f455445203b0167c44f0850cb1..fa6623bba1f586ee368035aee9d74724ac341d0c 100644 |
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc |
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc |
@@ -375,14 +375,12 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) { |
return a_exp_b; |
} |
-static void OverdriveAndSuppressSSE2(float overdrive_scaling, |
- float hNl[PART_LEN1], |
- const float hNlFb, |
- float efw[2][PART_LEN1]) { |
+static void OverdriveSSE2(float overdrive_scaling, |
+ float hNlFb, |
+ float hNl[PART_LEN1]) { |
int i; |
const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); |
const __m128 vec_one = _mm_set1_ps(1.0f); |
- const __m128 vec_minus_one = _mm_set1_ps(-1.0f); |
const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling); |
// vectorized code (four at once) |
for (i = 0; i + 3 < PART_LEN1; i += 4) { |
@@ -399,28 +397,12 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling, |
bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); |
vec_hNl = _mm_or_ps(vec_if0, vec_if1); |
- { |
- const __m128 vec_overDriveCurve = |
- _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); |
- const __m128 vec_overDriveSm_overDriveCurve = |
- _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); |
- vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); |
- _mm_storeu_ps(&hNl[i], vec_hNl); |
- } |
- |
- // Suppress error signal |
- { |
- __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); |
- __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); |
- vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); |
- vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); |
- |
- // Ooura fft returns incorrect sign on imaginary component. It matters |
- // here because we are making an additive change with comfort noise. |
- vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); |
- _mm_storeu_ps(&efw[0][i], vec_efw_re); |
- _mm_storeu_ps(&efw[1][i], vec_efw_im); |
- } |
+ const __m128 vec_overDriveCurve = |
+ _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); |
+ const __m128 vec_overDriveSm_overDriveCurve = |
+ _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); |
+ vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); |
+ _mm_storeu_ps(&hNl[i], vec_hNl); |
} |
// scalar code for the remaining items. |
for (; i < PART_LEN1; i++) { |
@@ -430,7 +412,29 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling, |
(1 - WebRtcAec_weightCurve[i]) * hNl[i]; |
} |
hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); |
+ } |
+} |
+static void SuppressSSE2(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { |
+ int i; |
+ const __m128 vec_minus_one = _mm_set1_ps(-1.0f); |
+ // vectorized code (four at once) |
+ for (i = 0; i + 3 < PART_LEN1; i += 4) { |
+ // Suppress error signal |
+ __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); |
+ __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); |
+ __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); |
+ vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); |
+ vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); |
+ |
+ // Ooura fft returns incorrect sign on imaginary component. It matters |
+ // here because we are making an additive change with comfort noise. |
+ vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); |
+ _mm_storeu_ps(&efw[0][i], vec_efw_re); |
+ _mm_storeu_ps(&efw[1][i], vec_efw_im); |
+ } |
+ // scalar code for the remaining items. |
+ for (; i < PART_LEN1; i++) { |
// Suppress error signal |
efw[0][i] *= hNl[i]; |
efw[1][i] *= hNl[i]; |
@@ -735,7 +739,8 @@ void WebRtcAec_InitAec_SSE2(void) { |
WebRtcAec_FilterFar = FilterFarSSE2; |
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
- WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
+ WebRtcAec_Overdrive = OverdriveSSE2; |
+ WebRtcAec_Suppress = SuppressSSE2; |
WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2; |
WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2; |
WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; |