OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
446 // Suppress error signal | 446 // Suppress error signal |
447 efw[0][i] *= hNl[i]; | 447 efw[0][i] *= hNl[i]; |
448 efw[1][i] *= hNl[i]; | 448 efw[1][i] *= hNl[i]; |
449 | 449 |
450 // Ooura fft returns incorrect sign on imaginary component. It matters | 450 // Ooura fft returns incorrect sign on imaginary component. It matters |
451 // here because we are making an additive change with comfort noise. | 451 // here because we are making an additive change with comfort noise. |
452 efw[1][i] *= -1; | 452 efw[1][i] *= -1; |
453 } | 453 } |
454 } | 454 } |
455 | 455 |
456 static int PartitionDelay(const AecCore* aec) { | 456 static int PartitionDelayNEON(const AecCore* aec) { |
457 // Measures the energy in each filter partition and returns the partition with | 457 // Measures the energy in each filter partition and returns the partition with |
458 // highest energy. | 458 // highest energy. |
459 // TODO(bjornv): Spread computational cost by computing one partition per | 459 // TODO(bjornv): Spread computational cost by computing one partition per |
460 // block? | 460 // block? |
461 float wfEnMax = 0; | 461 float wfEnMax = 0; |
462 int i; | 462 int i; |
463 int delay = 0; | 463 int delay = 0; |
464 | 464 |
465 for (i = 0; i < aec->num_partitions; i++) { | 465 for (i = 0; i < aec->num_partitions; i++) { |
466 int j; | 466 int j; |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
631 | 631 |
632 if (aec->divergeState) | 632 if (aec->divergeState) |
633 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); | 633 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); |
634 | 634 |
635 // Reset if error is significantly larger than nearend (13 dB). | 635 // Reset if error is significantly larger than nearend (13 dB). |
636 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) | 636 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) |
637 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); | 637 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); |
638 } | 638 } |
639 | 639 |
640 // Window time domain data to be used by the fft. | 640 // Window time domain data to be used by the fft. |
641 __inline static void WindowData(float* x_windowed, const float* x) { | 641 static void WindowDataNEON(float* x_windowed, const float* x) { |
642 int i; | 642 int i; |
643 for (i = 0; i < PART_LEN; i += 4) { | 643 for (i = 0; i < PART_LEN; i += 4) { |
644 const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); | 644 const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); |
645 const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); | 645 const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); |
646 const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); | 646 const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); |
647 // A B C D | 647 // A B C D |
648 float32x4_t vec_sqrtHanning_rev = | 648 float32x4_t vec_sqrtHanning_rev = |
649 vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); | 649 vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); |
650 // B A D C | 650 // B A D C |
651 vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); | 651 vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); |
652 // D C B A | 652 // D C B A |
653 vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), | 653 vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), |
654 vget_low_f32(vec_sqrtHanning_rev)); | 654 vget_low_f32(vec_sqrtHanning_rev)); |
655 vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); | 655 vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); |
656 vst1q_f32(&x_windowed[PART_LEN + i], | 656 vst1q_f32(&x_windowed[PART_LEN + i], |
657 vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); | 657 vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); |
658 } | 658 } |
659 } | 659 } |
660 | 660 |
661 // Puts fft output data into a complex valued array. | 661 // Puts fft output data into a complex valued array. |
662 __inline static void StoreAsComplex(const float* data, | 662 static void StoreAsComplexNEON(const float* data, |
663 float data_complex[2][PART_LEN1]) { | 663 float data_complex[2][PART_LEN1]) { |
664 int i; | 664 int i; |
665 for (i = 0; i < PART_LEN; i += 4) { | 665 for (i = 0; i < PART_LEN; i += 4) { |
666 const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); | 666 const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); |
667 vst1q_f32(&data_complex[0][i], vec_data.val[0]); | 667 vst1q_f32(&data_complex[0][i], vec_data.val[0]); |
668 vst1q_f32(&data_complex[1][i], vec_data.val[1]); | 668 vst1q_f32(&data_complex[1][i], vec_data.val[1]); |
669 } | 669 } |
670 // fix beginning/end values | 670 // fix beginning/end values |
671 data_complex[1][0] = 0; | 671 data_complex[1][0] = 0; |
672 data_complex[1][PART_LEN] = 0; | 672 data_complex[1][PART_LEN] = 0; |
673 data_complex[0][0] = data[0]; | 673 data_complex[0][0] = data[0]; |
674 data_complex[0][PART_LEN] = data[1]; | 674 data_complex[0][PART_LEN] = data[1]; |
675 } | 675 } |
676 | 676 |
677 static void SubbandCoherenceNEON(AecCore* aec, | 677 static void SubbandCoherenceNEON(AecCore* aec, |
678 float efw[2][PART_LEN1], | 678 float efw[2][PART_LEN1], |
| 679 float dfw[2][PART_LEN1]; |
679 float xfw[2][PART_LEN1], | 680 float xfw[2][PART_LEN1], |
680 float* fft, | 681 float* fft, |
681 float* cohde, | 682 float* cohde, |
682 float* cohxd) { | 683 float* cohxd) { |
683 float dfw[2][PART_LEN1]; | |
684 int i; | 684 int i; |
685 | 685 |
686 if (aec->delayEstCtr == 0) | |
687 aec->delayIdx = PartitionDelay(aec); | |
688 | |
689 // Use delayed far. | |
690 memcpy(xfw, | |
691 aec->xfwBuf + aec->delayIdx * PART_LEN1, | |
692 sizeof(xfw[0][0]) * 2 * PART_LEN1); | |
693 | |
694 // Windowed near fft | |
695 WindowData(fft, aec->dBuf); | |
696 aec_rdft_forward_128(fft); | |
697 StoreAsComplex(fft, dfw); | |
698 | |
699 // Windowed error fft | |
700 WindowData(fft, aec->eBuf); | |
701 aec_rdft_forward_128(fft); | |
702 StoreAsComplex(fft, efw); | |
703 | |
704 SmoothedPSD(aec, efw, dfw, xfw); | 686 SmoothedPSD(aec, efw, dfw, xfw); |
705 | 687 |
706 { | 688 { |
707 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); | 689 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); |
708 | 690 |
709 // Subband coherence | 691 // Subband coherence |
710 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 692 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
711 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); | 693 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); |
712 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); | 694 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); |
713 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); | 695 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); |
(...skipping 22 matching lines...) Expand all Loading... |
736 (aec->sx[i] * aec->sd[i] + 1e-10f); | 718 (aec->sx[i] * aec->sd[i] + 1e-10f); |
737 } | 719 } |
738 } | 720 } |
739 | 721 |
740 void WebRtcAec_InitAec_neon(void) { | 722 void WebRtcAec_InitAec_neon(void) { |
741 WebRtcAec_FilterFar = FilterFarNEON; | 723 WebRtcAec_FilterFar = FilterFarNEON; |
742 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 724 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
743 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 725 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
744 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 726 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
745 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 727 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
| 728 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
| 729 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
| 730 WebRtcAec_WindowData = WindowDataNEON; |
746 } | 731 } |
OLD | NEW |