| Index: webrtc/modules/audio_processing/aec/aec_core_sse2.cc
|
| diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
|
| index 9a64616b0d94f1f25e548fad1c5eff26d50a3a8a..91d98b9773a1ad8f33d8cd1efde4f9a54252d6b4 100644
|
| --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
|
| +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
|
| @@ -495,16 +495,19 @@ static int PartitionDelaySSE2(const AecCore* aec) {
|
| //
|
| // In addition to updating the PSDs, also the filter diverge state is determined
|
| // upon actions are taken.
|
| -static void SmoothedPSD(AecCore* aec,
|
| +static void SmoothedPSD(int mult,
|
| + bool extended_filter_enabled,
|
| float efw[2][PART_LEN1],
|
| float dfw[2][PART_LEN1],
|
| float xfw[2][PART_LEN1],
|
| + CoherenceState* coherence_state,
|
| + short* filter_divergence_state,
|
| int* extreme_filter_divergence) {
|
| // Power estimate smoothing coefficients.
|
| const float* ptrGCoh =
|
| - aec->extended_filter_enabled
|
| - ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
|
| - : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
|
| + extended_filter_enabled
|
| + ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1]
|
| + : WebRtcAec_kNormalSmoothingCoefficients[mult - 1];
|
| int i;
|
| float sdSum = 0, seSum = 0;
|
| const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD);
|
| @@ -520,9 +523,12 @@ static void SmoothedPSD(AecCore* aec,
|
| const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
|
| const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
|
| const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
|
| - __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
|
| - __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
|
| - __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
|
| + __m128 vec_sd =
|
| + _mm_mul_ps(_mm_loadu_ps(&coherence_state->sd[i]), vec_GCoh0);
|
| + __m128 vec_se =
|
| + _mm_mul_ps(_mm_loadu_ps(&coherence_state->se[i]), vec_GCoh0);
|
| + __m128 vec_sx =
|
| + _mm_mul_ps(_mm_loadu_ps(&coherence_state->sx[i]), vec_GCoh0);
|
| __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
|
| __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
|
| __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
|
| @@ -533,13 +539,13 @@ static void SmoothedPSD(AecCore* aec,
|
| vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
|
| vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
|
| vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
|
| - _mm_storeu_ps(&aec->sd[i], vec_sd);
|
| - _mm_storeu_ps(&aec->se[i], vec_se);
|
| - _mm_storeu_ps(&aec->sx[i], vec_sx);
|
| + _mm_storeu_ps(&coherence_state->sd[i], vec_sd);
|
| + _mm_storeu_ps(&coherence_state->se[i], vec_se);
|
| + _mm_storeu_ps(&coherence_state->sx[i], vec_sx);
|
|
|
| {
|
| - const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
|
| - const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
|
| + const __m128 vec_3210 = _mm_loadu_ps(&coherence_state->sde[i][0]);
|
| + const __m128 vec_7654 = _mm_loadu_ps(&coherence_state->sde[i + 2][0]);
|
| __m128 vec_a =
|
| _mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(2, 0, 2, 0));
|
| __m128 vec_b =
|
| @@ -554,13 +560,14 @@ static void SmoothedPSD(AecCore* aec,
|
| _mm_sub_ps(vec_dfwefw0110, _mm_mul_ps(vec_dfw1, vec_efw0));
|
| vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
|
| vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
|
| - _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
| - _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
|
| + _mm_storeu_ps(&coherence_state->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
| + _mm_storeu_ps(&coherence_state->sde[i + 2][0],
|
| + _mm_unpackhi_ps(vec_a, vec_b));
|
| }
|
|
|
| {
|
| - const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
|
| - const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
|
| + const __m128 vec_3210 = _mm_loadu_ps(&coherence_state->sxd[i][0]);
|
| + const __m128 vec_7654 = _mm_loadu_ps(&coherence_state->sxd[i + 2][0]);
|
| __m128 vec_a =
|
| _mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(2, 0, 2, 0));
|
| __m128 vec_b =
|
| @@ -575,8 +582,9 @@ static void SmoothedPSD(AecCore* aec,
|
| _mm_sub_ps(vec_dfwxfw0110, _mm_mul_ps(vec_dfw1, vec_xfw0));
|
| vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
|
| vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
|
| - _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
| - _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
|
| + _mm_storeu_ps(&coherence_state->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
| + _mm_storeu_ps(&coherence_state->sxd[i + 2][0],
|
| + _mm_unpackhi_ps(vec_a, vec_b));
|
| }
|
|
|
| vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
|
| @@ -587,39 +595,43 @@ static void SmoothedPSD(AecCore* aec,
|
| _mm_add_ps_4x1(vec_seSum, &seSum);
|
|
|
| for (; i < PART_LEN1; i++) {
|
| - aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
| - ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
| - aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
| - ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
| + coherence_state->sd[i] =
|
| + ptrGCoh[0] * coherence_state->sd[i] +
|
| + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
| + coherence_state->se[i] =
|
| + ptrGCoh[0] * coherence_state->se[i] +
|
| + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
| // We threshold here to protect against the ill-effects of a zero farend.
|
| // The threshold is not arbitrarily chosen, but balances protection and
|
| // adverse interaction with the algorithm's tuning.
|
| // TODO(bjornv): investigate further why this is so sensitive.
|
| - aec->sx[i] = ptrGCoh[0] * aec->sx[i] +
|
| - ptrGCoh[1] * WEBRTC_SPL_MAX(
|
| - xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
| - WebRtcAec_kMinFarendPSD);
|
| -
|
| - aec->sde[i][0] =
|
| - ptrGCoh[0] * aec->sde[i][0] +
|
| + coherence_state->sx[i] =
|
| + ptrGCoh[0] * coherence_state->sx[i] +
|
| + ptrGCoh[1] *
|
| + WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
| + WebRtcAec_kMinFarendPSD);
|
| +
|
| + coherence_state->sde[i][0] =
|
| + ptrGCoh[0] * coherence_state->sde[i][0] +
|
| ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
| - aec->sde[i][1] =
|
| - ptrGCoh[0] * aec->sde[i][1] +
|
| + coherence_state->sde[i][1] =
|
| + ptrGCoh[0] * coherence_state->sde[i][1] +
|
| ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
|
|
| - aec->sxd[i][0] =
|
| - ptrGCoh[0] * aec->sxd[i][0] +
|
| + coherence_state->sxd[i][0] =
|
| + ptrGCoh[0] * coherence_state->sxd[i][0] +
|
| ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
| - aec->sxd[i][1] =
|
| - ptrGCoh[0] * aec->sxd[i][1] +
|
| + coherence_state->sxd[i][1] =
|
| + ptrGCoh[0] * coherence_state->sxd[i][1] +
|
| ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
|
|
| - sdSum += aec->sd[i];
|
| - seSum += aec->se[i];
|
| + sdSum += coherence_state->sd[i];
|
| + seSum += coherence_state->se[i];
|
| }
|
|
|
| // Divergent filter safeguard update.
|
| - aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
|
| + *filter_divergence_state =
|
| + (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum;
|
|
|
| // Signal extreme filter divergence if the error is significantly larger
|
| // than the nearend (13 dB).
|
| @@ -666,34 +678,38 @@ static void StoreAsComplexSSE2(const float* data,
|
| data_complex[0][PART_LEN] = data[1];
|
| }
|
|
|
| -static void SubbandCoherenceSSE2(AecCore* aec,
|
| +static void SubbandCoherenceSSE2(int mult,
|
| + bool extended_filter_enabled,
|
| float efw[2][PART_LEN1],
|
| float dfw[2][PART_LEN1],
|
| float xfw[2][PART_LEN1],
|
| float* fft,
|
| float* cohde,
|
| float* cohxd,
|
| + CoherenceState* coherence_state,
|
| + short* filter_divergence_state,
|
| int* extreme_filter_divergence) {
|
| int i;
|
|
|
| - SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
|
| + SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state,
|
| + filter_divergence_state, extreme_filter_divergence);
|
|
|
| {
|
| const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f);
|
|
|
| // Subband coherence
|
| for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
| - const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
|
| - const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
|
| - const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
|
| + const __m128 vec_sd = _mm_loadu_ps(&coherence_state->sd[i]);
|
| + const __m128 vec_se = _mm_loadu_ps(&coherence_state->se[i]);
|
| + const __m128 vec_sx = _mm_loadu_ps(&coherence_state->sx[i]);
|
| const __m128 vec_sdse =
|
| _mm_add_ps(vec_1eminus10, _mm_mul_ps(vec_sd, vec_se));
|
| const __m128 vec_sdsx =
|
| _mm_add_ps(vec_1eminus10, _mm_mul_ps(vec_sd, vec_sx));
|
| - const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
|
| - const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
|
| - const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
|
| - const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
|
| + const __m128 vec_sde_3210 = _mm_loadu_ps(&coherence_state->sde[i][0]);
|
| + const __m128 vec_sde_7654 = _mm_loadu_ps(&coherence_state->sde[i + 2][0]);
|
| + const __m128 vec_sxd_3210 = _mm_loadu_ps(&coherence_state->sxd[i][0]);
|
| + const __m128 vec_sxd_7654 = _mm_loadu_ps(&coherence_state->sxd[i + 2][0]);
|
| const __m128 vec_sde_0 =
|
| _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, _MM_SHUFFLE(2, 0, 2, 0));
|
| const __m128 vec_sde_1 =
|
| @@ -714,12 +730,12 @@ static void SubbandCoherenceSSE2(AecCore* aec,
|
|
|
| // scalar code for the remaining items.
|
| for (; i < PART_LEN1; i++) {
|
| - cohde[i] =
|
| - (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
| - (aec->sd[i] * aec->se[i] + 1e-10f);
|
| - cohxd[i] =
|
| - (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
| - (aec->sx[i] * aec->sd[i] + 1e-10f);
|
| + cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] +
|
| + coherence_state->sde[i][1] * coherence_state->sde[i][1]) /
|
| + (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f);
|
| + cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +
|
| + coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /
|
| + (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);
|
| }
|
| }
|
| }
|
|
|