| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 446 // Suppress error signal | 446 // Suppress error signal |
| 447 efw[0][i] *= hNl[i]; | 447 efw[0][i] *= hNl[i]; |
| 448 efw[1][i] *= hNl[i]; | 448 efw[1][i] *= hNl[i]; |
| 449 | 449 |
| 450 // Ooura fft returns incorrect sign on imaginary component. It matters | 450 // Ooura fft returns incorrect sign on imaginary component. It matters |
| 451 // here because we are making an additive change with comfort noise. | 451 // here because we are making an additive change with comfort noise. |
| 452 efw[1][i] *= -1; | 452 efw[1][i] *= -1; |
| 453 } | 453 } |
| 454 } | 454 } |
| 455 | 455 |
| 456 static int PartitionDelay(const AecCore* aec) { | 456 static int PartitionDelayNEON(const AecCore* aec) { |
| 457 // Measures the energy in each filter partition and returns the partition with | 457 // Measures the energy in each filter partition and returns the partition with |
| 458 // highest energy. | 458 // highest energy. |
| 459 // TODO(bjornv): Spread computational cost by computing one partition per | 459 // TODO(bjornv): Spread computational cost by computing one partition per |
| 460 // block? | 460 // block? |
| 461 float wfEnMax = 0; | 461 float wfEnMax = 0; |
| 462 int i; | 462 int i; |
| 463 int delay = 0; | 463 int delay = 0; |
| 464 | 464 |
| 465 for (i = 0; i < aec->num_partitions; i++) { | 465 for (i = 0; i < aec->num_partitions; i++) { |
| 466 int j; | 466 int j; |
| (...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 631 | 631 |
| 632 if (aec->divergeState) | 632 if (aec->divergeState) |
| 633 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); | 633 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); |
| 634 | 634 |
| 635 // Reset if error is significantly larger than nearend (13 dB). | 635 // Reset if error is significantly larger than nearend (13 dB). |
| 636 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) | 636 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) |
| 637 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); | 637 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); |
| 638 } | 638 } |
| 639 | 639 |
| 640 // Window time domain data to be used by the fft. | 640 // Window time domain data to be used by the fft. |
| 641 __inline static void WindowData(float* x_windowed, const float* x) { | 641 static void WindowDataNEON(float* x_windowed, const float* x) { |
| 642 int i; | 642 int i; |
| 643 for (i = 0; i < PART_LEN; i += 4) { | 643 for (i = 0; i < PART_LEN; i += 4) { |
| 644 const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); | 644 const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); |
| 645 const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); | 645 const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); |
| 646 const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); | 646 const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); |
| 647 // A B C D | 647 // A B C D |
| 648 float32x4_t vec_sqrtHanning_rev = | 648 float32x4_t vec_sqrtHanning_rev = |
| 649 vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); | 649 vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); |
| 650 // B A D C | 650 // B A D C |
| 651 vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); | 651 vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); |
| 652 // D C B A | 652 // D C B A |
| 653 vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), | 653 vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), |
| 654 vget_low_f32(vec_sqrtHanning_rev)); | 654 vget_low_f32(vec_sqrtHanning_rev)); |
| 655 vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); | 655 vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); |
| 656 vst1q_f32(&x_windowed[PART_LEN + i], | 656 vst1q_f32(&x_windowed[PART_LEN + i], |
| 657 vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); | 657 vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); |
| 658 } | 658 } |
| 659 } | 659 } |
| 660 | 660 |
| 661 // Puts fft output data into a complex valued array. | 661 // Puts fft output data into a complex valued array. |
| 662 __inline static void StoreAsComplex(const float* data, | 662 static void StoreAsComplexNEON(const float* data, |
| 663 float data_complex[2][PART_LEN1]) { | 663 float data_complex[2][PART_LEN1]) { |
| 664 int i; | 664 int i; |
| 665 for (i = 0; i < PART_LEN; i += 4) { | 665 for (i = 0; i < PART_LEN; i += 4) { |
| 666 const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); | 666 const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); |
| 667 vst1q_f32(&data_complex[0][i], vec_data.val[0]); | 667 vst1q_f32(&data_complex[0][i], vec_data.val[0]); |
| 668 vst1q_f32(&data_complex[1][i], vec_data.val[1]); | 668 vst1q_f32(&data_complex[1][i], vec_data.val[1]); |
| 669 } | 669 } |
| 670 // fix beginning/end values | 670 // fix beginning/end values |
| 671 data_complex[1][0] = 0; | 671 data_complex[1][0] = 0; |
| 672 data_complex[1][PART_LEN] = 0; | 672 data_complex[1][PART_LEN] = 0; |
| 673 data_complex[0][0] = data[0]; | 673 data_complex[0][0] = data[0]; |
| 674 data_complex[0][PART_LEN] = data[1]; | 674 data_complex[0][PART_LEN] = data[1]; |
| 675 } | 675 } |
| 676 | 676 |
| 677 static void SubbandCoherenceNEON(AecCore* aec, | 677 static void SubbandCoherenceNEON(AecCore* aec, |
| 678 float efw[2][PART_LEN1], | 678 float efw[2][PART_LEN1], |
| 679 float dfw[2][PART_LEN1]; |
| 679 float xfw[2][PART_LEN1], | 680 float xfw[2][PART_LEN1], |
| 680 float* fft, | 681 float* fft, |
| 681 float* cohde, | 682 float* cohde, |
| 682 float* cohxd) { | 683 float* cohxd) { |
| 683 float dfw[2][PART_LEN1]; | |
| 684 int i; | 684 int i; |
| 685 | 685 |
| 686 if (aec->delayEstCtr == 0) | |
| 687 aec->delayIdx = PartitionDelay(aec); | |
| 688 | |
| 689 // Use delayed far. | |
| 690 memcpy(xfw, | |
| 691 aec->xfwBuf + aec->delayIdx * PART_LEN1, | |
| 692 sizeof(xfw[0][0]) * 2 * PART_LEN1); | |
| 693 | |
| 694 // Windowed near fft | |
| 695 WindowData(fft, aec->dBuf); | |
| 696 aec_rdft_forward_128(fft); | |
| 697 StoreAsComplex(fft, dfw); | |
| 698 | |
| 699 // Windowed error fft | |
| 700 WindowData(fft, aec->eBuf); | |
| 701 aec_rdft_forward_128(fft); | |
| 702 StoreAsComplex(fft, efw); | |
| 703 | |
| 704 SmoothedPSD(aec, efw, dfw, xfw); | 686 SmoothedPSD(aec, efw, dfw, xfw); |
| 705 | 687 |
| 706 { | 688 { |
| 707 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); | 689 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); |
| 708 | 690 |
| 709 // Subband coherence | 691 // Subband coherence |
| 710 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 692 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 711 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); | 693 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); |
| 712 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); | 694 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); |
| 713 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); | 695 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); |
| (...skipping 22 matching lines...) Expand all Loading... |
| 736 (aec->sx[i] * aec->sd[i] + 1e-10f); | 718 (aec->sx[i] * aec->sd[i] + 1e-10f); |
| 737 } | 719 } |
| 738 } | 720 } |
| 739 | 721 |
| 740 void WebRtcAec_InitAec_neon(void) { | 722 void WebRtcAec_InitAec_neon(void) { |
| 741 WebRtcAec_FilterFar = FilterFarNEON; | 723 WebRtcAec_FilterFar = FilterFarNEON; |
| 742 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 724 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
| 743 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 725 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
| 744 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 726 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
| 745 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 727 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
| 728 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
| 729 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
| 730 WebRtcAec_WindowData = WindowDataNEON; |
| 746 } | 731 } |
| OLD | NEW |