OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
115 // | 115 // |
116 // Note: The precision did not improve after 2 iterations. | 116 // Note: The precision did not improve after 2 iterations. |
117 for (i = 0; i < 2; i++) { | 117 for (i = 0; i < 2; i++) { |
118 x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); | 118 x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); |
119 } | 119 } |
120 // sqrt(s) = s * 1/sqrt(s) | 120 // sqrt(s) = s * 1/sqrt(s) |
121 return vmulq_f32(s, x);; | 121 return vmulq_f32(s, x);; |
122 } | 122 } |
123 #endif // WEBRTC_ARCH_ARM64 | 123 #endif // WEBRTC_ARCH_ARM64 |
124 | 124 |
125 static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { | 125 static void ScaleErrorSignalNEON(int extended_filter_enabled, |
126 const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; | 126 float normal_mu, |
127 const float error_threshold = aec->extended_filter_enabled ? | 127 float normal_error_threshold, |
128 kExtendedErrorThreshold : aec->normal_error_threshold; | 128 float *x_pow, |
| 129 float ef[2][PART_LEN1]) { |
| 130 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; |
| 131 const float error_threshold = extended_filter_enabled ? |
| 132 kExtendedErrorThreshold : normal_error_threshold; |
129 const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); | 133 const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); |
130 const float32x4_t kMu = vmovq_n_f32(mu); | 134 const float32x4_t kMu = vmovq_n_f32(mu); |
131 const float32x4_t kThresh = vmovq_n_f32(error_threshold); | 135 const float32x4_t kThresh = vmovq_n_f32(error_threshold); |
132 int i; | 136 int i; |
133 // vectorized code (four at once) | 137 // vectorized code (four at once) |
134 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 138 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
135 const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); | 139 const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]); |
136 const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); | 140 const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); |
137 const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); | 141 const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); |
138 const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); | 142 const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f); |
139 float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); | 143 float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); |
140 float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); | 144 float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); |
141 const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); | 145 const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); |
142 const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); | 146 const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); |
143 const float32x4_t absEf = vsqrtq_f32(ef_sum2); | 147 const float32x4_t absEf = vsqrtq_f32(ef_sum2); |
144 const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); | 148 const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); |
145 const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); | 149 const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); |
146 const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); | 150 const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); |
147 uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); | 151 uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); |
148 uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); | 152 uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); |
149 uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger), | 153 uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger), |
150 vreinterpretq_u32_f32(ef_re)); | 154 vreinterpretq_u32_f32(ef_re)); |
151 uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger), | 155 uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger), |
152 vreinterpretq_u32_f32(ef_im)); | 156 vreinterpretq_u32_f32(ef_im)); |
153 ef_re_if = vandq_u32(bigger, ef_re_if); | 157 ef_re_if = vandq_u32(bigger, ef_re_if); |
154 ef_im_if = vandq_u32(bigger, ef_im_if); | 158 ef_im_if = vandq_u32(bigger, ef_im_if); |
155 ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); | 159 ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); |
156 ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); | 160 ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); |
157 ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); | 161 ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); |
158 ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); | 162 ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); |
159 vst1q_f32(&ef[0][i], ef_re); | 163 vst1q_f32(&ef[0][i], ef_re); |
160 vst1q_f32(&ef[1][i], ef_im); | 164 vst1q_f32(&ef[1][i], ef_im); |
161 } | 165 } |
162 // scalar code for the remaining items. | 166 // scalar code for the remaining items. |
163 for (; i < PART_LEN1; i++) { | 167 for (; i < PART_LEN1; i++) { |
164 float abs_ef; | 168 float abs_ef; |
165 ef[0][i] /= (aec->xPow[i] + 1e-10f); | 169 ef[0][i] /= (x_pow[i] + 1e-10f); |
166 ef[1][i] /= (aec->xPow[i] + 1e-10f); | 170 ef[1][i] /= (x_pow[i] + 1e-10f); |
167 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); | 171 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); |
168 | 172 |
169 if (abs_ef > error_threshold) { | 173 if (abs_ef > error_threshold) { |
170 abs_ef = error_threshold / (abs_ef + 1e-10f); | 174 abs_ef = error_threshold / (abs_ef + 1e-10f); |
171 ef[0][i] *= abs_ef; | 175 ef[0][i] *= abs_ef; |
172 ef[1][i] *= abs_ef; | 176 ef[1][i] *= abs_ef; |
173 } | 177 } |
174 | 178 |
175 // Stepsize factor | 179 // Stepsize factor |
176 ef[0][i] *= mu; | 180 ef[0][i] *= mu; |
(...skipping 549 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
726 } | 730 } |
727 } | 731 } |
728 | 732 |
729 void WebRtcAec_InitAec_neon(void) { | 733 void WebRtcAec_InitAec_neon(void) { |
730 WebRtcAec_FilterFar = FilterFarNEON; | 734 WebRtcAec_FilterFar = FilterFarNEON; |
731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
732 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 736 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
734 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 738 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
735 } | 739 } |
736 | |
OLD | NEW |