| Index: webrtc/modules/audio_processing/aec/aec_core_neon.cc
|
| diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
|
| index 5ff81496d5571bcb13db9e400280745225df5262..fda5ad8b2fe6733fe9e01ee12818ca2700edca6a 100644
|
| --- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc
|
| +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
|
| @@ -374,14 +374,12 @@ static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
|
| return a_exp_b;
|
| }
|
|
|
| -static void OverdriveAndSuppressNEON(float overdrive_scaling,
|
| - float hNl[PART_LEN1],
|
| - const float hNlFb,
|
| - float efw[2][PART_LEN1]) {
|
| +static void OverdriveNEON(float overdrive_scaling,
|
| + float hNlFb,
|
| + float hNl[PART_LEN1]) {
|
| int i;
|
| const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
|
| const float32x4_t vec_one = vdupq_n_f32(1.0f);
|
| - const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
|
| const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);
|
|
|
| // vectorized code (four at once)
|
| @@ -404,28 +402,12 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling,
|
|
|
| vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
|
|
|
| - {
|
| - const float32x4_t vec_overDriveCurve =
|
| - vld1q_f32(&WebRtcAec_overDriveCurve[i]);
|
| - const float32x4_t vec_overDriveSm_overDriveCurve =
|
| - vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
|
| - vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
|
| - vst1q_f32(&hNl[i], vec_hNl);
|
| - }
|
| -
|
| - // Suppress error signal
|
| - {
|
| - float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
|
| - float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
|
| - vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
|
| - vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
|
| -
|
| - // Ooura fft returns incorrect sign on imaginary component. It matters
|
| - // here because we are making an additive change with comfort noise.
|
| - vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
|
| - vst1q_f32(&efw[0][i], vec_efw_re);
|
| - vst1q_f32(&efw[1][i], vec_efw_im);
|
| - }
|
| + const float32x4_t vec_overDriveCurve =
|
| + vld1q_f32(&WebRtcAec_overDriveCurve[i]);
|
| + const float32x4_t vec_overDriveSm_overDriveCurve =
|
| + vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
|
| + vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
|
| + vst1q_f32(&hNl[i], vec_hNl);
|
| }
|
|
|
| // scalar code for the remaining items.
|
| @@ -437,8 +419,29 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling,
|
| }
|
|
|
| hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
|
| + }
|
| +}
|
| +
|
| +static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
|
| + int i;
|
| + const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
|
| + // vectorized code (four at once)
|
| + for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
| + float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
|
| + float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
|
| + float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
|
| + vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
|
| + vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
|
| +
|
| + // Ooura fft returns incorrect sign on imaginary component. It matters
|
| + // here because we are making an additive change with comfort noise.
|
| + vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
|
| + vst1q_f32(&efw[0][i], vec_efw_re);
|
| + vst1q_f32(&efw[1][i], vec_efw_im);
|
| + }
|
|
|
| - // Suppress error signal
|
| + // scalar code for the remaining items.
|
| + for (; i < PART_LEN1; i++) {
|
| efw[0][i] *= hNl[i];
|
| efw[1][i] *= hNl[i];
|
|
|
| @@ -722,7 +725,8 @@ void WebRtcAec_InitAec_neon(void) {
|
| WebRtcAec_FilterFar = FilterFarNEON;
|
| WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
|
| WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
|
| - WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
|
| + WebRtcAec_Overdrive = OverdriveNEON;
|
| + WebRtcAec_Suppress = SuppressNEON;
|
| WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;
|
| WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;
|
| WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
|
|
|