webrtc/modules/audio_processing/aec/aec_core_neon.c - Issue 1455163006: Ducking fix #1:Initial refactoring preparing for further AEC work (changes are bitexact).

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.c

Issue 1455163006: Ducking fix #1:Initial refactoring preparing for further AEC work (changes are bitexact). (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Fixed problem with variable declared inside loop statement Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 //	115 //

116 // Note: The precision did not improve after 2 iterations.	116 // Note: The precision did not improve after 2 iterations.

117 for (i = 0; i < 2; i++) {	117 for (i = 0; i < 2; i++) {

118 x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);	118 x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);

119 }	119 }

120 // sqrt(s) = s * 1/sqrt(s)	120 // sqrt(s) = s * 1/sqrt(s)

121 return vmulq_f32(s, x);;	121 return vmulq_f32(s, x);;

122 }	122 }

123 #endif // WEBRTC_ARCH_ARM64	123 #endif // WEBRTC_ARCH_ARM64

124	124

125 static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {	125 static void ScaleErrorSignalNEON(int extended_filter_enabled,

126 const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;	126 float normal_mu,

127 const float error_threshold = aec->extended_filter_enabled ?	127 float normal_error_threshold,

128 kExtendedErrorThreshold : aec->normal_error_threshold;	128 float *x_pow,

	129 float ef[2][PART_LEN1]) {

	130 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;

	131 const float error_threshold = extended_filter_enabled ?

	132 kExtendedErrorThreshold : normal_error_threshold;

129 const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);	133 const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);

130 const float32x4_t kMu = vmovq_n_f32(mu);	134 const float32x4_t kMu = vmovq_n_f32(mu);

131 const float32x4_t kThresh = vmovq_n_f32(error_threshold);	135 const float32x4_t kThresh = vmovq_n_f32(error_threshold);

132 int i;	136 int i;

133 // vectorized code (four at once)	137 // vectorized code (four at once)

134 for (i = 0; i + 3 < PART_LEN1; i += 4) {	138 for (i = 0; i + 3 < PART_LEN1; i += 4) {

135 const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);	139 const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]);

136 const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);	140 const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);

137 const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);	141 const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);

138 const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);	142 const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f);

139 float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);	143 float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);

140 float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);	144 float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);

141 const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);	145 const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);

142 const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);	146 const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);

143 const float32x4_t absEf = vsqrtq_f32(ef_sum2);	147 const float32x4_t absEf = vsqrtq_f32(ef_sum2);

144 const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);	148 const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);

145 const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);	149 const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);

146 const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);	150 const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);

147 uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));	151 uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));

148 uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));	152 uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));

149 uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),	153 uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),

150 vreinterpretq_u32_f32(ef_re));	154 vreinterpretq_u32_f32(ef_re));

151 uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),	155 uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),

152 vreinterpretq_u32_f32(ef_im));	156 vreinterpretq_u32_f32(ef_im));

153 ef_re_if = vandq_u32(bigger, ef_re_if);	157 ef_re_if = vandq_u32(bigger, ef_re_if);

154 ef_im_if = vandq_u32(bigger, ef_im_if);	158 ef_im_if = vandq_u32(bigger, ef_im_if);

155 ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);	159 ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);

156 ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);	160 ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);

157 ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);	161 ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);

158 ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);	162 ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);

159 vst1q_f32(&ef[0][i], ef_re);	163 vst1q_f32(&ef[0][i], ef_re);

160 vst1q_f32(&ef[1][i], ef_im);	164 vst1q_f32(&ef[1][i], ef_im);

161 }	165 }

162 // scalar code for the remaining items.	166 // scalar code for the remaining items.

163 for (; i < PART_LEN1; i++) {	167 for (; i < PART_LEN1; i++) {

164 float abs_ef;	168 float abs_ef;

165 ef[0][i] /= (aec->xPow[i] + 1e-10f);	169 ef[0][i] /= (x_pow[i] + 1e-10f);

166 ef[1][i] /= (aec->xPow[i] + 1e-10f);	170 ef[1][i] /= (x_pow[i] + 1e-10f);

167 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);	171 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);

168	172

169 if (abs_ef > error_threshold) {	173 if (abs_ef > error_threshold) {

170 abs_ef = error_threshold / (abs_ef + 1e-10f);	174 abs_ef = error_threshold / (abs_ef + 1e-10f);

171 ef[0][i] *= abs_ef;	175 ef[0][i] *= abs_ef;

172 ef[1][i] *= abs_ef;	176 ef[1][i] *= abs_ef;

173 }	177 }

174	178

175 // Stepsize factor	179 // Stepsize factor

176 ef[0][i] *= mu;	180 ef[0][i] *= mu;

(...skipping 549 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
726 }	730 }

727 }	731 }

728	732

729 void WebRtcAec_InitAec_neon(void) {	733 void WebRtcAec_InitAec_neon(void) {

730 WebRtcAec_FilterFar = FilterFarNEON;	734 WebRtcAec_FilterFar = FilterFarNEON;

731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;	735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;

732 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;	736 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;

733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;	737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;

734 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;	738 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;

735 }	739 }

736

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.c ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.c » ('j') | no next file with comments »