Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(309)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.c

Issue 1455163006: Ducking fix #1:Initial refactoring preparing for further AEC work (changes are bitexact). (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fixed problem with variable declared inside loop statement Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 // 115 //
116 // Note: The precision did not improve after 2 iterations. 116 // Note: The precision did not improve after 2 iterations.
117 for (i = 0; i < 2; i++) { 117 for (i = 0; i < 2; i++) {
118 x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); 118 x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
119 } 119 }
120 // sqrt(s) = s * 1/sqrt(s) 120 // sqrt(s) = s * 1/sqrt(s)
121 return vmulq_f32(s, x);; 121 return vmulq_f32(s, x);;
122 } 122 }
123 #endif // WEBRTC_ARCH_ARM64 123 #endif // WEBRTC_ARCH_ARM64
124 124
125 static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { 125 static void ScaleErrorSignalNEON(int extended_filter_enabled,
126 const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; 126 float normal_mu,
127 const float error_threshold = aec->extended_filter_enabled ? 127 float normal_error_threshold,
128 kExtendedErrorThreshold : aec->normal_error_threshold; 128 float *x_pow,
129 float ef[2][PART_LEN1]) {
130 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
131 const float error_threshold = extended_filter_enabled ?
132 kExtendedErrorThreshold : normal_error_threshold;
129 const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); 133 const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
130 const float32x4_t kMu = vmovq_n_f32(mu); 134 const float32x4_t kMu = vmovq_n_f32(mu);
131 const float32x4_t kThresh = vmovq_n_f32(error_threshold); 135 const float32x4_t kThresh = vmovq_n_f32(error_threshold);
132 int i; 136 int i;
133 // vectorized code (four at once) 137 // vectorized code (four at once)
134 for (i = 0; i + 3 < PART_LEN1; i += 4) { 138 for (i = 0; i + 3 < PART_LEN1; i += 4) {
135 const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); 139 const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]);
136 const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); 140 const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
137 const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); 141 const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
138 const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); 142 const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f);
139 float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); 143 float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
140 float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); 144 float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
141 const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); 145 const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
142 const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); 146 const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
143 const float32x4_t absEf = vsqrtq_f32(ef_sum2); 147 const float32x4_t absEf = vsqrtq_f32(ef_sum2);
144 const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); 148 const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
145 const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); 149 const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
146 const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); 150 const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
147 uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); 151 uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
148 uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); 152 uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
149 uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger), 153 uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
150 vreinterpretq_u32_f32(ef_re)); 154 vreinterpretq_u32_f32(ef_re));
151 uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger), 155 uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
152 vreinterpretq_u32_f32(ef_im)); 156 vreinterpretq_u32_f32(ef_im));
153 ef_re_if = vandq_u32(bigger, ef_re_if); 157 ef_re_if = vandq_u32(bigger, ef_re_if);
154 ef_im_if = vandq_u32(bigger, ef_im_if); 158 ef_im_if = vandq_u32(bigger, ef_im_if);
155 ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); 159 ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
156 ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); 160 ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
157 ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); 161 ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
158 ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); 162 ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
159 vst1q_f32(&ef[0][i], ef_re); 163 vst1q_f32(&ef[0][i], ef_re);
160 vst1q_f32(&ef[1][i], ef_im); 164 vst1q_f32(&ef[1][i], ef_im);
161 } 165 }
162 // scalar code for the remaining items. 166 // scalar code for the remaining items.
163 for (; i < PART_LEN1; i++) { 167 for (; i < PART_LEN1; i++) {
164 float abs_ef; 168 float abs_ef;
165 ef[0][i] /= (aec->xPow[i] + 1e-10f); 169 ef[0][i] /= (x_pow[i] + 1e-10f);
166 ef[1][i] /= (aec->xPow[i] + 1e-10f); 170 ef[1][i] /= (x_pow[i] + 1e-10f);
167 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 171 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
168 172
169 if (abs_ef > error_threshold) { 173 if (abs_ef > error_threshold) {
170 abs_ef = error_threshold / (abs_ef + 1e-10f); 174 abs_ef = error_threshold / (abs_ef + 1e-10f);
171 ef[0][i] *= abs_ef; 175 ef[0][i] *= abs_ef;
172 ef[1][i] *= abs_ef; 176 ef[1][i] *= abs_ef;
173 } 177 }
174 178
175 // Stepsize factor 179 // Stepsize factor
176 ef[0][i] *= mu; 180 ef[0][i] *= mu;
(...skipping 549 matching lines...) Expand 10 before | Expand all | Expand 10 after
726 } 730 }
727 } 731 }
728 732
729 void WebRtcAec_InitAec_neon(void) { 733 void WebRtcAec_InitAec_neon(void) {
730 WebRtcAec_FilterFar = FilterFarNEON; 734 WebRtcAec_FilterFar = FilterFarNEON;
731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; 735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
732 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 736 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; 737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
734 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; 738 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
735 } 739 }
736
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.c ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698