Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(162)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.c

Issue 1494563002: Refactoring (bitexact) of the EchoSuppressor in WebRTC AEC (#1) (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fixed error in the function header Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 421 matching lines...) Expand 10 before | Expand all | Expand 10 after
432 } 432 }
433 } 433 }
434 434
435 __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { 435 __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
436 // A+B C+D 436 // A+B C+D
437 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2))); 437 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
438 // A+B+C+D A+B+C+D 438 // A+B+C+D A+B+C+D
439 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); 439 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
440 _mm_store_ss(dst, sum); 440 _mm_store_ss(dst, sum);
441 } 441 }
442 static int PartitionDelay(const AecCore* aec) { 442
443 static int PartitionDelaySSE2(const AecCore* aec) {
443 // Measures the energy in each filter partition and returns the partition with 444 // Measures the energy in each filter partition and returns the partition with
444 // highest energy. 445 // highest energy.
445 // TODO(bjornv): Spread computational cost by computing one partition per 446 // TODO(bjornv): Spread computational cost by computing one partition per
446 // block? 447 // block?
447 float wfEnMax = 0; 448 float wfEnMax = 0;
448 int i; 449 int i;
449 int delay = 0; 450 int delay = 0;
450 451
451 for (i = 0; i < aec->num_partitions; i++) { 452 for (i = 0; i < aec->num_partitions; i++) {
452 int j; 453 int j;
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
612 613
613 if (aec->divergeState) 614 if (aec->divergeState)
614 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); 615 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
615 616
616 // Reset if error is significantly larger than nearend (13 dB). 617 // Reset if error is significantly larger than nearend (13 dB).
617 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) 618 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
618 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 619 memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
619 } 620 }
620 621
621 // Window time domain data to be used by the fft. 622 // Window time domain data to be used by the fft.
622 __inline static void WindowData(float* x_windowed, const float* x) { 623 static void WindowDataSSE2(float* x_windowed, const float* x) {
623 int i; 624 int i;
624 for (i = 0; i < PART_LEN; i += 4) { 625 for (i = 0; i < PART_LEN; i += 4) {
625 const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); 626 const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
626 const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]); 627 const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
627 const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]); 628 const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
628 // A B C D 629 // A B C D
629 __m128 vec_sqrtHanning_rev = 630 __m128 vec_sqrtHanning_rev =
630 _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); 631 _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
631 // D C B A 632 // D C B A
632 vec_sqrtHanning_rev = 633 vec_sqrtHanning_rev =
633 _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev, 634 _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
634 _MM_SHUFFLE(0, 1, 2, 3)); 635 _MM_SHUFFLE(0, 1, 2, 3));
635 _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning)); 636 _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
636 _mm_storeu_ps(&x_windowed[PART_LEN + i], 637 _mm_storeu_ps(&x_windowed[PART_LEN + i],
637 _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev)); 638 _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
638 } 639 }
639 } 640 }
640 641
641 // Puts fft output data into a complex valued array. 642 // Puts fft output data into a complex valued array.
642 __inline static void StoreAsComplex(const float* data, 643 static void StoreAsComplexSSE2(const float* data,
643 float data_complex[2][PART_LEN1]) { 644 float data_complex[2][PART_LEN1]) {
644 int i; 645 int i;
645 for (i = 0; i < PART_LEN; i += 4) { 646 for (i = 0; i < PART_LEN; i += 4) {
646 const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); 647 const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
647 const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]); 648 const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
648 const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4, 649 const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
649 _MM_SHUFFLE(2, 0, 2, 0)); 650 _MM_SHUFFLE(2, 0, 2, 0));
650 const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4, 651 const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
651 _MM_SHUFFLE(3, 1, 3, 1)); 652 _MM_SHUFFLE(3, 1, 3, 1));
652 _mm_storeu_ps(&data_complex[0][i], vec_a); 653 _mm_storeu_ps(&data_complex[0][i], vec_a);
653 _mm_storeu_ps(&data_complex[1][i], vec_b); 654 _mm_storeu_ps(&data_complex[1][i], vec_b);
654 } 655 }
655 // fix beginning/end values 656 // fix beginning/end values
656 data_complex[1][0] = 0; 657 data_complex[1][0] = 0;
657 data_complex[1][PART_LEN] = 0; 658 data_complex[1][PART_LEN] = 0;
658 data_complex[0][0] = data[0]; 659 data_complex[0][0] = data[0];
659 data_complex[0][PART_LEN] = data[1]; 660 data_complex[0][PART_LEN] = data[1];
660 } 661 }
661 662
662 static void SubbandCoherenceSSE2(AecCore* aec, 663 static void SubbandCoherenceSSE2(AecCore* aec,
663 float efw[2][PART_LEN1], 664 float efw[2][PART_LEN1],
665 float dfw[2][PART_LEN1],
664 float xfw[2][PART_LEN1], 666 float xfw[2][PART_LEN1],
665 float* fft, 667 float* fft,
666 float* cohde, 668 float* cohde,
667 float* cohxd) { 669 float* cohxd) {
668 float dfw[2][PART_LEN1];
669 int i; 670 int i;
670 671
671 if (aec->delayEstCtr == 0)
672 aec->delayIdx = PartitionDelay(aec);
673
674 // Use delayed far.
675 memcpy(xfw,
676 aec->xfwBuf + aec->delayIdx * PART_LEN1,
677 sizeof(xfw[0][0]) * 2 * PART_LEN1);
678
679 // Windowed near fft
680 WindowData(fft, aec->dBuf);
681 aec_rdft_forward_128(fft);
682 StoreAsComplex(fft, dfw);
683
684 // Windowed error fft
685 WindowData(fft, aec->eBuf);
686 aec_rdft_forward_128(fft);
687 StoreAsComplex(fft, efw);
688
689 SmoothedPSD(aec, efw, dfw, xfw); 672 SmoothedPSD(aec, efw, dfw, xfw);
690 673
691 { 674 {
692 const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f); 675 const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f);
693 676
694 // Subband coherence 677 // Subband coherence
695 for (i = 0; i + 3 < PART_LEN1; i += 4) { 678 for (i = 0; i + 3 < PART_LEN1; i += 4) {
696 const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]); 679 const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
697 const __m128 vec_se = _mm_loadu_ps(&aec->se[i]); 680 const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
698 const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]); 681 const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
733 } 716 }
734 } 717 }
735 } 718 }
736 719
737 void WebRtcAec_InitAec_SSE2(void) { 720 void WebRtcAec_InitAec_SSE2(void) {
738 WebRtcAec_FilterFar = FilterFarSSE2; 721 WebRtcAec_FilterFar = FilterFarSSE2;
739 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 722 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
740 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 723 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
741 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 724 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
742 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 725 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
726 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;
727 WebRtcAec_PartitionDelay = PartitionDelaySSE2;
728 WebRtcAec_WindowData = WindowDataSSE2;
743 } 729 }
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.c ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698