OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... | |
27 enum { kFloatExponentShift = 23 }; | 27 enum { kFloatExponentShift = 23 }; |
28 | 28 |
29 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { | 29 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { |
30 return aRe * bRe - aIm * bIm; | 30 return aRe * bRe - aIm * bIm; |
31 } | 31 } |
32 | 32 |
33 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { | 33 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { |
34 return aRe * bIm + aIm * bRe; | 34 return aRe * bIm + aIm * bRe; |
35 } | 35 } |
36 | 36 |
37 static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { | 37 static void FilterFarNEON(int num_partitions, |
38 int xfBufBlockPos, | |
39 float xfBuf[2][kExtendedNumPartitions * PART_LEN1], | |
40 float wfBuf[2][kExtendedNumPartitions * PART_LEN1], | |
41 float yf[2][PART_LEN1]) { | |
38 int i; | 42 int i; |
39 const int num_partitions = aec->num_partitions; | 43 const int num_partitions_local = num_partitions; |
hlundin-webrtc
2015/11/20 10:59:40
You don't need the local variant, do you?
peah-webrtc
2015/11/24 06:48:56
I'm not 100% sure. It is const-declared, and this
| |
40 for (i = 0; i < num_partitions; i++) { | 44 for (i = 0; i < num_partitions_local; i++) { |
41 int j; | 45 int j; |
42 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; | 46 int xPos = (i + xfBufBlockPos) * PART_LEN1; |
43 int pos = i * PART_LEN1; | 47 int pos = i * PART_LEN1; |
44 // Check for wrap | 48 // Check for wrap |
45 if (i + aec->xfBufBlockPos >= num_partitions) { | 49 if (i + xfBufBlockPos >= num_partitions_local) { |
46 xPos -= num_partitions * PART_LEN1; | 50 xPos -= num_partitions_local * PART_LEN1; |
47 } | 51 } |
48 | 52 |
49 // vectorized code (four at once) | 53 // vectorized code (four at once) |
50 for (j = 0; j + 3 < PART_LEN1; j += 4) { | 54 for (j = 0; j + 3 < PART_LEN1; j += 4) { |
51 const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); | 55 const float32x4_t xfBuf_re = vld1q_f32(&xfBuf[0][xPos + j]); |
52 const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); | 56 const float32x4_t xfBuf_im = vld1q_f32(&xfBuf[1][xPos + j]); |
53 const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); | 57 const float32x4_t wfBuf_re = vld1q_f32(&wfBuf[0][pos + j]); |
54 const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); | 58 const float32x4_t wfBuf_im = vld1q_f32(&wfBuf[1][pos + j]); |
55 const float32x4_t yf_re = vld1q_f32(&yf[0][j]); | 59 const float32x4_t yf_re = vld1q_f32(&yf[0][j]); |
56 const float32x4_t yf_im = vld1q_f32(&yf[1][j]); | 60 const float32x4_t yf_im = vld1q_f32(&yf[1][j]); |
57 const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); | 61 const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); |
58 const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); | 62 const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); |
59 const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); | 63 const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); |
60 const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); | 64 const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); |
61 const float32x4_t g = vaddq_f32(yf_re, e); | 65 const float32x4_t g = vaddq_f32(yf_re, e); |
62 const float32x4_t h = vaddq_f32(yf_im, f); | 66 const float32x4_t h = vaddq_f32(yf_im, f); |
63 vst1q_f32(&yf[0][j], g); | 67 vst1q_f32(&yf[0][j], g); |
64 vst1q_f32(&yf[1][j], h); | 68 vst1q_f32(&yf[1][j], h); |
65 } | 69 } |
66 // scalar code for the remaining items. | 70 // scalar code for the remaining items. |
67 for (; j < PART_LEN1; j++) { | 71 for (; j < PART_LEN1; j++) { |
68 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], | 72 yf[0][j] += MulRe(xfBuf[0][xPos + j], |
69 aec->xfBuf[1][xPos + j], | 73 xfBuf[1][xPos + j], |
70 aec->wfBuf[0][pos + j], | 74 wfBuf[0][pos + j], |
71 aec->wfBuf[1][pos + j]); | 75 wfBuf[1][pos + j]); |
72 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], | 76 yf[1][j] += MulIm(xfBuf[0][xPos + j], |
73 aec->xfBuf[1][xPos + j], | 77 xfBuf[1][xPos + j], |
74 aec->wfBuf[0][pos + j], | 78 wfBuf[0][pos + j], |
75 aec->wfBuf[1][pos + j]); | 79 wfBuf[1][pos + j]); |
76 } | 80 } |
77 } | 81 } |
78 } | 82 } |
79 | 83 |
80 // ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. | 84 // ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. |
81 #if !defined (WEBRTC_ARCH_ARM64) | 85 #if !defined (WEBRTC_ARCH_ARM64) |
82 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { | 86 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { |
83 int i; | 87 int i; |
84 float32x4_t x = vrecpeq_f32(b); | 88 float32x4_t x = vrecpeq_f32(b); |
85 // from arm documentation | 89 // from arm documentation |
(...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
730 } | 734 } |
731 } | 735 } |
732 | 736 |
733 void WebRtcAec_InitAec_neon(void) { | 737 void WebRtcAec_InitAec_neon(void) { |
734 WebRtcAec_FilterFar = FilterFarNEON; | 738 WebRtcAec_FilterFar = FilterFarNEON; |
735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 739 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
736 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 740 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 741 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
738 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 742 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
739 } | 743 } |
OLD | NEW |