OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... |
27 enum { kFloatExponentShift = 23 }; | 27 enum { kFloatExponentShift = 23 }; |
28 | 28 |
29 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { | 29 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { |
30 return aRe * bRe - aIm * bIm; | 30 return aRe * bRe - aIm * bIm; |
31 } | 31 } |
32 | 32 |
33 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { | 33 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { |
34 return aRe * bIm + aIm * bRe; | 34 return aRe * bIm + aIm * bRe; |
35 } | 35 } |
36 | 36 |
37 static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { | 37 static void FilterFarNEON(int num_partitions, |
| 38 int xfBufBlockPos, |
| 39 float xfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| 40 float wfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| 41 float yf[2][PART_LEN1]) { |
38 int i; | 42 int i; |
39 const int num_partitions = aec->num_partitions; | |
40 for (i = 0; i < num_partitions; i++) { | 43 for (i = 0; i < num_partitions; i++) { |
41 int j; | 44 int j; |
42 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; | 45 int xPos = (i + xfBufBlockPos) * PART_LEN1; |
43 int pos = i * PART_LEN1; | 46 int pos = i * PART_LEN1; |
44 // Check for wrap | 47 // Check for wrap |
45 if (i + aec->xfBufBlockPos >= num_partitions) { | 48 if (i + xfBufBlockPos >= num_partitions) { |
46 xPos -= num_partitions * PART_LEN1; | 49 xPos -= num_partitions * PART_LEN1; |
47 } | 50 } |
48 | 51 |
49 // vectorized code (four at once) | 52 // vectorized code (four at once) |
50 for (j = 0; j + 3 < PART_LEN1; j += 4) { | 53 for (j = 0; j + 3 < PART_LEN1; j += 4) { |
51 const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); | 54 const float32x4_t xfBuf_re = vld1q_f32(&xfBuf[0][xPos + j]); |
52 const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); | 55 const float32x4_t xfBuf_im = vld1q_f32(&xfBuf[1][xPos + j]); |
53 const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); | 56 const float32x4_t wfBuf_re = vld1q_f32(&wfBuf[0][pos + j]); |
54 const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); | 57 const float32x4_t wfBuf_im = vld1q_f32(&wfBuf[1][pos + j]); |
55 const float32x4_t yf_re = vld1q_f32(&yf[0][j]); | 58 const float32x4_t yf_re = vld1q_f32(&yf[0][j]); |
56 const float32x4_t yf_im = vld1q_f32(&yf[1][j]); | 59 const float32x4_t yf_im = vld1q_f32(&yf[1][j]); |
57 const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); | 60 const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); |
58 const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); | 61 const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); |
59 const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); | 62 const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); |
60 const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); | 63 const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); |
61 const float32x4_t g = vaddq_f32(yf_re, e); | 64 const float32x4_t g = vaddq_f32(yf_re, e); |
62 const float32x4_t h = vaddq_f32(yf_im, f); | 65 const float32x4_t h = vaddq_f32(yf_im, f); |
63 vst1q_f32(&yf[0][j], g); | 66 vst1q_f32(&yf[0][j], g); |
64 vst1q_f32(&yf[1][j], h); | 67 vst1q_f32(&yf[1][j], h); |
65 } | 68 } |
66 // scalar code for the remaining items. | 69 // scalar code for the remaining items. |
67 for (; j < PART_LEN1; j++) { | 70 for (; j < PART_LEN1; j++) { |
68 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], | 71 yf[0][j] += MulRe(xfBuf[0][xPos + j], |
69 aec->xfBuf[1][xPos + j], | 72 xfBuf[1][xPos + j], |
70 aec->wfBuf[0][pos + j], | 73 wfBuf[0][pos + j], |
71 aec->wfBuf[1][pos + j]); | 74 wfBuf[1][pos + j]); |
72 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], | 75 yf[1][j] += MulIm(xfBuf[0][xPos + j], |
73 aec->xfBuf[1][xPos + j], | 76 xfBuf[1][xPos + j], |
74 aec->wfBuf[0][pos + j], | 77 wfBuf[0][pos + j], |
75 aec->wfBuf[1][pos + j]); | 78 wfBuf[1][pos + j]); |
76 } | 79 } |
77 } | 80 } |
78 } | 81 } |
79 | 82 |
80 // ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. | 83 // ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. |
81 #if !defined (WEBRTC_ARCH_ARM64) | 84 #if !defined (WEBRTC_ARCH_ARM64) |
82 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { | 85 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { |
83 int i; | 86 int i; |
84 float32x4_t x = vrecpeq_f32(b); | 87 float32x4_t x = vrecpeq_f32(b); |
85 // from arm documentation | 88 // from arm documentation |
(...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
730 } | 733 } |
731 } | 734 } |
732 | 735 |
733 void WebRtcAec_InitAec_neon(void) { | 736 void WebRtcAec_InitAec_neon(void) { |
734 WebRtcAec_FilterFar = FilterFarNEON; | 737 WebRtcAec_FilterFar = FilterFarNEON; |
735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 738 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
736 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 739 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 740 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
738 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 741 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
739 } | 742 } |
OLD | NEW |