Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(167)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.c

Issue 1494563002: Refactoring (bitexact) of the EchoSuppressor in WebRTC AEC (#1) (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fixed error in the function header Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 435 matching lines...) Expand 10 before | Expand all | Expand 10 after
446 // Suppress error signal 446 // Suppress error signal
447 efw[0][i] *= hNl[i]; 447 efw[0][i] *= hNl[i];
448 efw[1][i] *= hNl[i]; 448 efw[1][i] *= hNl[i];
449 449
450 // Ooura fft returns incorrect sign on imaginary component. It matters 450 // Ooura fft returns incorrect sign on imaginary component. It matters
451 // here because we are making an additive change with comfort noise. 451 // here because we are making an additive change with comfort noise.
452 efw[1][i] *= -1; 452 efw[1][i] *= -1;
453 } 453 }
454 } 454 }
455 455
456 static int PartitionDelay(const AecCore* aec) { 456 static int PartitionDelayNEON(const AecCore* aec) {
457 // Measures the energy in each filter partition and returns the partition with 457 // Measures the energy in each filter partition and returns the partition with
458 // highest energy. 458 // highest energy.
459 // TODO(bjornv): Spread computational cost by computing one partition per 459 // TODO(bjornv): Spread computational cost by computing one partition per
460 // block? 460 // block?
461 float wfEnMax = 0; 461 float wfEnMax = 0;
462 int i; 462 int i;
463 int delay = 0; 463 int delay = 0;
464 464
465 for (i = 0; i < aec->num_partitions; i++) { 465 for (i = 0; i < aec->num_partitions; i++) {
466 int j; 466 int j;
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
631 631
632 if (aec->divergeState) 632 if (aec->divergeState)
633 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); 633 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
634 634
635 // Reset if error is significantly larger than nearend (13 dB). 635 // Reset if error is significantly larger than nearend (13 dB).
636 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) 636 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
637 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 637 memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
638 } 638 }
639 639
640 // Window time domain data to be used by the fft. 640 // Window time domain data to be used by the fft.
641 __inline static void WindowData(float* x_windowed, const float* x) { 641 static void WindowDataNEON(float* x_windowed, const float* x) {
642 int i; 642 int i;
643 for (i = 0; i < PART_LEN; i += 4) { 643 for (i = 0; i < PART_LEN; i += 4) {
644 const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); 644 const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
645 const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); 645 const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
646 const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); 646 const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
647 // A B C D 647 // A B C D
648 float32x4_t vec_sqrtHanning_rev = 648 float32x4_t vec_sqrtHanning_rev =
649 vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); 649 vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
650 // B A D C 650 // B A D C
651 vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); 651 vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
652 // D C B A 652 // D C B A
653 vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), 653 vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
654 vget_low_f32(vec_sqrtHanning_rev)); 654 vget_low_f32(vec_sqrtHanning_rev));
655 vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); 655 vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
656 vst1q_f32(&x_windowed[PART_LEN + i], 656 vst1q_f32(&x_windowed[PART_LEN + i],
657 vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); 657 vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
658 } 658 }
659 } 659 }
660 660
661 // Puts fft output data into a complex valued array. 661 // Puts fft output data into a complex valued array.
662 __inline static void StoreAsComplex(const float* data, 662 static void StoreAsComplexNEON(const float* data,
663 float data_complex[2][PART_LEN1]) { 663 float data_complex[2][PART_LEN1]) {
664 int i; 664 int i;
665 for (i = 0; i < PART_LEN; i += 4) { 665 for (i = 0; i < PART_LEN; i += 4) {
666 const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); 666 const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
667 vst1q_f32(&data_complex[0][i], vec_data.val[0]); 667 vst1q_f32(&data_complex[0][i], vec_data.val[0]);
668 vst1q_f32(&data_complex[1][i], vec_data.val[1]); 668 vst1q_f32(&data_complex[1][i], vec_data.val[1]);
669 } 669 }
670 // fix beginning/end values 670 // fix beginning/end values
671 data_complex[1][0] = 0; 671 data_complex[1][0] = 0;
672 data_complex[1][PART_LEN] = 0; 672 data_complex[1][PART_LEN] = 0;
673 data_complex[0][0] = data[0]; 673 data_complex[0][0] = data[0];
674 data_complex[0][PART_LEN] = data[1]; 674 data_complex[0][PART_LEN] = data[1];
675 } 675 }
676 676
677 static void SubbandCoherenceNEON(AecCore* aec, 677 static void SubbandCoherenceNEON(AecCore* aec,
678 float efw[2][PART_LEN1], 678 float efw[2][PART_LEN1],
679 float dfw[2][PART_LEN1],
679 float xfw[2][PART_LEN1], 680 float xfw[2][PART_LEN1],
680 float* fft, 681 float* fft,
681 float* cohde, 682 float* cohde,
682 float* cohxd) { 683 float* cohxd) {
683 float dfw[2][PART_LEN1];
684 int i; 684 int i;
685 685
686 if (aec->delayEstCtr == 0)
687 aec->delayIdx = PartitionDelay(aec);
688
689 // Use delayed far.
690 memcpy(xfw,
691 aec->xfwBuf + aec->delayIdx * PART_LEN1,
692 sizeof(xfw[0][0]) * 2 * PART_LEN1);
693
694 // Windowed near fft
695 WindowData(fft, aec->dBuf);
696 aec_rdft_forward_128(fft);
697 StoreAsComplex(fft, dfw);
698
699 // Windowed error fft
700 WindowData(fft, aec->eBuf);
701 aec_rdft_forward_128(fft);
702 StoreAsComplex(fft, efw);
703
704 SmoothedPSD(aec, efw, dfw, xfw); 686 SmoothedPSD(aec, efw, dfw, xfw);
705 687
706 { 688 {
707 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); 689 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);
708 690
709 // Subband coherence 691 // Subband coherence
710 for (i = 0; i + 3 < PART_LEN1; i += 4) { 692 for (i = 0; i + 3 < PART_LEN1; i += 4) {
711 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); 693 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
712 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); 694 const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
713 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); 695 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
(...skipping 22 matching lines...) Expand all
736 (aec->sx[i] * aec->sd[i] + 1e-10f); 718 (aec->sx[i] * aec->sd[i] + 1e-10f);
737 } 719 }
738 } 720 }
739 721
740 void WebRtcAec_InitAec_neon(void) { 722 void WebRtcAec_InitAec_neon(void) {
741 WebRtcAec_FilterFar = FilterFarNEON; 723 WebRtcAec_FilterFar = FilterFarNEON;
742 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; 724 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
743 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 725 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
744 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; 726 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
745 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; 727 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
728 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
729 WebRtcAec_PartitionDelay = PartitionDelayNEON;
730 WebRtcAec_WindowData = WindowDataNEON;
746 } 731 }
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698