OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
367 exp2_y = vmlaq_f32(C1, y, exp2_y); | 367 exp2_y = vmlaq_f32(C1, y, exp2_y); |
368 exp2_y = vmlaq_f32(C0, y, exp2_y); | 368 exp2_y = vmlaq_f32(C0, y, exp2_y); |
369 | 369 |
370 // Combine parts. | 370 // Combine parts. |
371 a_exp_b = vmulq_f32(exp2_y, two_n); | 371 a_exp_b = vmulq_f32(exp2_y, two_n); |
372 } | 372 } |
373 | 373 |
374 return a_exp_b; | 374 return a_exp_b; |
375 } | 375 } |
376 | 376 |
377 static void OverdriveAndSuppressNEON(float overdrive_scaling, | 377 static void OverdriveNEON(float overdrive_scaling, |
378 float hNl[PART_LEN1], | 378 float hNlFb, |
379 const float hNlFb, | 379 float hNl[PART_LEN1]) { |
380 float efw[2][PART_LEN1]) { | |
381 int i; | 380 int i; |
382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); | 381 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); |
383 const float32x4_t vec_one = vdupq_n_f32(1.0f); | 382 const float32x4_t vec_one = vdupq_n_f32(1.0f); |
384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); | |
385 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); | 383 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); |
386 | 384 |
387 // vectorized code (four at once) | 385 // vectorized code (four at once) |
388 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 386 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
389 // Weight subbands | 387 // Weight subbands |
390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); | 388 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); |
391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); | 389 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); |
392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); | 390 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); |
393 const float32x4_t vec_weightCurve_hNlFb = | 391 const float32x4_t vec_weightCurve_hNlFb = |
394 vmulq_f32(vec_weightCurve, vec_hNlFb); | 392 vmulq_f32(vec_weightCurve, vec_hNlFb); |
395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); | 393 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); |
396 const float32x4_t vec_one_weightCurve_hNl = | 394 const float32x4_t vec_one_weightCurve_hNl = |
397 vmulq_f32(vec_one_weightCurve, vec_hNl); | 395 vmulq_f32(vec_one_weightCurve, vec_hNl); |
398 const uint32x4_t vec_if0 = | 396 const uint32x4_t vec_if0 = |
399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); | 397 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); |
400 const float32x4_t vec_one_weightCurve_add = | 398 const float32x4_t vec_one_weightCurve_add = |
401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); | 399 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); |
402 const uint32x4_t vec_if1 = | 400 const uint32x4_t vec_if1 = |
403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); | 401 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); |
404 | 402 |
405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); | 403 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); |
406 | 404 |
407 { | 405 const float32x4_t vec_overDriveCurve = |
408 const float32x4_t vec_overDriveCurve = | 406 vld1q_f32(&WebRtcAec_overDriveCurve[i]); |
409 vld1q_f32(&WebRtcAec_overDriveCurve[i]); | 407 const float32x4_t vec_overDriveSm_overDriveCurve = |
410 const float32x4_t vec_overDriveSm_overDriveCurve = | 408 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); |
411 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); | 409 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); |
412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); | 410 vst1q_f32(&hNl[i], vec_hNl); |
413 vst1q_f32(&hNl[i], vec_hNl); | |
414 } | |
415 | |
416 // Suppress error signal | |
417 { | |
418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); | |
419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); | |
420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); | |
421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); | |
422 | |
423 // Ooura fft returns incorrect sign on imaginary component. It matters | |
424 // here because we are making an additive change with comfort noise. | |
425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); | |
426 vst1q_f32(&efw[0][i], vec_efw_re); | |
427 vst1q_f32(&efw[1][i], vec_efw_im); | |
428 } | |
429 } | 411 } |
430 | 412 |
431 // scalar code for the remaining items. | 413 // scalar code for the remaining items. |
432 for (; i < PART_LEN1; i++) { | 414 for (; i < PART_LEN1; i++) { |
433 // Weight subbands | 415 // Weight subbands |
434 if (hNl[i] > hNlFb) { | 416 if (hNl[i] > hNlFb) { |
435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + | 417 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + |
436 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; | 418 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; |
437 } | 419 } |
438 | 420 |
439 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); | 421 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); |
| 422 } |
| 423 } |
440 | 424 |
441 // Suppress error signal | 425 static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { |
| 426 int i; |
| 427 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); |
| 428 // vectorized code (four at once) |
| 429 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 430 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); |
| 431 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); |
| 432 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); |
| 433 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); |
| 434 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); |
| 435 |
| 436 // Ooura fft returns incorrect sign on imaginary component. It matters |
| 437 // here because we are making an additive change with comfort noise. |
| 438 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); |
| 439 vst1q_f32(&efw[0][i], vec_efw_re); |
| 440 vst1q_f32(&efw[1][i], vec_efw_im); |
| 441 } |
| 442 |
| 443 // scalar code for the remaining items. |
| 444 for (; i < PART_LEN1; i++) { |
442 efw[0][i] *= hNl[i]; | 445 efw[0][i] *= hNl[i]; |
443 efw[1][i] *= hNl[i]; | 446 efw[1][i] *= hNl[i]; |
444 | 447 |
445 // Ooura fft returns incorrect sign on imaginary component. It matters | 448 // Ooura fft returns incorrect sign on imaginary component. It matters |
446 // here because we are making an additive change with comfort noise. | 449 // here because we are making an additive change with comfort noise. |
447 efw[1][i] *= -1; | 450 efw[1][i] *= -1; |
448 } | 451 } |
449 } | 452 } |
450 | 453 |
451 static int PartitionDelayNEON( | 454 static int PartitionDelayNEON( |
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
715 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + | 718 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + |
716 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / | 719 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / |
717 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); | 720 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); |
718 } | 721 } |
719 } | 722 } |
720 | 723 |
721 void WebRtcAec_InitAec_neon(void) { | 724 void WebRtcAec_InitAec_neon(void) { |
722 WebRtcAec_FilterFar = FilterFarNEON; | 725 WebRtcAec_FilterFar = FilterFarNEON; |
723 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 726 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
724 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 727 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
725 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 728 WebRtcAec_Overdrive = OverdriveNEON; |
| 729 WebRtcAec_Suppress = SuppressNEON; |
726 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; | 730 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; |
727 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; | 731 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; |
728 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 732 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
729 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 733 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
730 WebRtcAec_WindowData = WindowDataNEON; | 734 WebRtcAec_WindowData = WindowDataNEON; |
731 } | 735 } |
732 } // namespace webrtc | 736 } // namespace webrtc |
OLD | NEW |