| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 
| 3  * | 3  * | 
| 4  *  Use of this source code is governed by a BSD-style license | 4  *  Use of this source code is governed by a BSD-style license | 
| 5  *  that can be found in the LICENSE file in the root of the source | 5  *  that can be found in the LICENSE file in the root of the source | 
| 6  *  tree. An additional intellectual property rights grant can be found | 6  *  tree. An additional intellectual property rights grant can be found | 
| 7  *  in the file PATENTS.  All contributing project authors may | 7  *  in the file PATENTS.  All contributing project authors may | 
| 8  *  be found in the AUTHORS file in the root of the source tree. | 8  *  be found in the AUTHORS file in the root of the source tree. | 
| 9  */ | 9  */ | 
| 10 | 10 | 
| (...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 367     exp2_y = vmlaq_f32(C1, y, exp2_y); | 367     exp2_y = vmlaq_f32(C1, y, exp2_y); | 
| 368     exp2_y = vmlaq_f32(C0, y, exp2_y); | 368     exp2_y = vmlaq_f32(C0, y, exp2_y); | 
| 369 | 369 | 
| 370     // Combine parts. | 370     // Combine parts. | 
| 371     a_exp_b = vmulq_f32(exp2_y, two_n); | 371     a_exp_b = vmulq_f32(exp2_y, two_n); | 
| 372   } | 372   } | 
| 373 | 373 | 
| 374   return a_exp_b; | 374   return a_exp_b; | 
| 375 } | 375 } | 
| 376 | 376 | 
| 377 static void OverdriveAndSuppressNEON(AecCore* aec, | 377 static void OverdriveAndSuppressNEON(float overdrive_scaling, | 
| 378                                      float hNl[PART_LEN1], | 378                                      float hNl[PART_LEN1], | 
| 379                                      const float hNlFb, | 379                                      const float hNlFb, | 
| 380                                      float efw[2][PART_LEN1]) { | 380                                      float efw[2][PART_LEN1]) { | 
| 381   int i; | 381   int i; | 
| 382   const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); | 382   const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); | 
| 383   const float32x4_t vec_one = vdupq_n_f32(1.0f); | 383   const float32x4_t vec_one = vdupq_n_f32(1.0f); | 
| 384   const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); | 384   const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); | 
| 385   const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm); | 385   const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); | 
| 386 | 386 | 
| 387   // vectorized code (four at once) | 387   // vectorized code (four at once) | 
| 388   for (i = 0; i + 3 < PART_LEN1; i += 4) { | 388   for (i = 0; i + 3 < PART_LEN1; i += 4) { | 
| 389     // Weight subbands | 389     // Weight subbands | 
| 390     float32x4_t vec_hNl = vld1q_f32(&hNl[i]); | 390     float32x4_t vec_hNl = vld1q_f32(&hNl[i]); | 
| 391     const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); | 391     const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); | 
| 392     const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); | 392     const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); | 
| 393     const float32x4_t vec_weightCurve_hNlFb = | 393     const float32x4_t vec_weightCurve_hNlFb = | 
| 394         vmulq_f32(vec_weightCurve, vec_hNlFb); | 394         vmulq_f32(vec_weightCurve, vec_hNlFb); | 
| 395     const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); | 395     const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); | 
| 396     const float32x4_t vec_one_weightCurve_hNl = | 396     const float32x4_t vec_one_weightCurve_hNl = | 
| 397         vmulq_f32(vec_one_weightCurve, vec_hNl); | 397         vmulq_f32(vec_one_weightCurve, vec_hNl); | 
| 398     const uint32x4_t vec_if0 = | 398     const uint32x4_t vec_if0 = | 
| 399         vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); | 399         vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); | 
| 400     const float32x4_t vec_one_weightCurve_add = | 400     const float32x4_t vec_one_weightCurve_add = | 
| 401         vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); | 401         vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); | 
| 402     const uint32x4_t vec_if1 = | 402     const uint32x4_t vec_if1 = | 
| 403         vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); | 403         vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); | 
| 404 | 404 | 
| 405     vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); | 405     vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); | 
| 406 | 406 | 
| 407     { | 407     { | 
| 408       const float32x4_t vec_overDriveCurve = | 408       const float32x4_t vec_overDriveCurve = | 
| 409           vld1q_f32(&WebRtcAec_overDriveCurve[i]); | 409           vld1q_f32(&WebRtcAec_overDriveCurve[i]); | 
| 410       const float32x4_t vec_overDriveSm_overDriveCurve = | 410       const float32x4_t vec_overDriveSm_overDriveCurve = | 
| 411           vmulq_f32(vec_overDriveSm, vec_overDriveCurve); | 411           vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); | 
| 412       vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); | 412       vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); | 
| 413       vst1q_f32(&hNl[i], vec_hNl); | 413       vst1q_f32(&hNl[i], vec_hNl); | 
| 414     } | 414     } | 
| 415 | 415 | 
| 416     // Suppress error signal | 416     // Suppress error signal | 
| 417     { | 417     { | 
| 418       float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); | 418       float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); | 
| 419       float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); | 419       float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); | 
| 420       vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); | 420       vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); | 
| 421       vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); | 421       vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); | 
| 422 | 422 | 
| 423       // Ooura fft returns incorrect sign on imaginary component. It matters | 423       // Ooura fft returns incorrect sign on imaginary component. It matters | 
| 424       // here because we are making an additive change with comfort noise. | 424       // here because we are making an additive change with comfort noise. | 
| 425       vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); | 425       vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); | 
| 426       vst1q_f32(&efw[0][i], vec_efw_re); | 426       vst1q_f32(&efw[0][i], vec_efw_re); | 
| 427       vst1q_f32(&efw[1][i], vec_efw_im); | 427       vst1q_f32(&efw[1][i], vec_efw_im); | 
| 428     } | 428     } | 
| 429   } | 429   } | 
| 430 | 430 | 
| 431   // scalar code for the remaining items. | 431   // scalar code for the remaining items. | 
| 432   for (; i < PART_LEN1; i++) { | 432   for (; i < PART_LEN1; i++) { | 
| 433     // Weight subbands | 433     // Weight subbands | 
| 434     if (hNl[i] > hNlFb) { | 434     if (hNl[i] > hNlFb) { | 
| 435       hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + | 435       hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + | 
| 436                (1 - WebRtcAec_weightCurve[i]) * hNl[i]; | 436                (1 - WebRtcAec_weightCurve[i]) * hNl[i]; | 
| 437     } | 437     } | 
| 438 | 438 | 
| 439     hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); | 439     hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); | 
| 440 | 440 | 
| 441     // Suppress error signal | 441     // Suppress error signal | 
| 442     efw[0][i] *= hNl[i]; | 442     efw[0][i] *= hNl[i]; | 
| 443     efw[1][i] *= hNl[i]; | 443     efw[1][i] *= hNl[i]; | 
| 444 | 444 | 
| 445     // Ooura fft returns incorrect sign on imaginary component. It matters | 445     // Ooura fft returns incorrect sign on imaginary component. It matters | 
| 446     // here because we are making an additive change with comfort noise. | 446     // here because we are making an additive change with comfort noise. | 
| 447     efw[1][i] *= -1; | 447     efw[1][i] *= -1; | 
| 448   } | 448   } | 
| 449 } | 449 } | 
| (...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 717   WebRtcAec_FilterFar = FilterFarNEON; | 717   WebRtcAec_FilterFar = FilterFarNEON; | 
| 718   WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 718   WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 
| 719   WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 719   WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 
| 720   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 720   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 
| 721   WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 721   WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 
| 722   WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 722   WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 
| 723   WebRtcAec_PartitionDelay = PartitionDelayNEON; | 723   WebRtcAec_PartitionDelay = PartitionDelayNEON; | 
| 724   WebRtcAec_WindowData = WindowDataNEON; | 724   WebRtcAec_WindowData = WindowDataNEON; | 
| 725 } | 725 } | 
| 726 }  // namespace webrtc | 726 }  // namespace webrtc | 
| OLD | NEW | 
|---|