OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 /* | 11 /* |
12 * The core AEC algorithm, neon version of speed-critical functions. | 12 * The core AEC algorithm, neon version of speed-critical functions. |
13 * | 13 * |
14 * Based on aec_core_sse2.c. | 14 * Based on aec_core_sse2.c. |
15 */ | 15 */ |
16 | 16 |
17 #include <arm_neon.h> | 17 #include <arm_neon.h> |
18 #include <math.h> | 18 #include <math.h> |
19 #include <string.h> // memset | 19 #include <string.h> // memset |
20 | 20 |
21 extern "C" { | 21 extern "C" { |
22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
23 } | 23 } |
24 #include "webrtc/modules/audio_processing/aec/aec_common.h" | 24 #include "webrtc/modules/audio_processing/aec/aec_common.h" |
25 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" | 25 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" |
26 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" | 26 #include "webrtc/modules/audio_processing/utility/ooura_fft.h" |
27 | 27 |
28 namespace webrtc { | 28 namespace webrtc { |
29 | 29 |
30 enum { kShiftExponentIntoTopMantissa = 8 }; | 30 enum { kShiftExponentIntoTopMantissa = 8 }; |
31 enum { kFloatExponentShift = 23 }; | 31 enum { kFloatExponentShift = 23 }; |
32 | 32 |
33 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { | 33 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { |
34 return aRe * bRe - aIm * bIm; | 34 return aRe * bRe - aIm * bIm; |
35 } | 35 } |
36 | 36 |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 ef[1][i] *= abs_ef; | 177 ef[1][i] *= abs_ef; |
178 } | 178 } |
179 | 179 |
180 // Stepsize factor | 180 // Stepsize factor |
181 ef[0][i] *= mu; | 181 ef[0][i] *= mu; |
182 ef[1][i] *= mu; | 182 ef[1][i] *= mu; |
183 } | 183 } |
184 } | 184 } |
185 | 185 |
186 static void FilterAdaptationNEON( | 186 static void FilterAdaptationNEON( |
| 187 const OouraFft& ooura_fft, |
187 int num_partitions, | 188 int num_partitions, |
188 int x_fft_buf_block_pos, | 189 int x_fft_buf_block_pos, |
189 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | 190 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], |
190 float e_fft[2][PART_LEN1], | 191 float e_fft[2][PART_LEN1], |
191 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { | 192 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { |
192 float fft[PART_LEN2]; | 193 float fft[PART_LEN2]; |
193 int i; | 194 int i; |
194 for (i = 0; i < num_partitions; i++) { | 195 for (i = 0; i < num_partitions; i++) { |
195 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; | 196 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; |
196 int pos = i * PART_LEN1; | 197 int pos = i * PART_LEN1; |
(...skipping 21 matching lines...) Expand all Loading... |
218 const float32x4x2_t g_n_h = vzipq_f32(e, f); | 219 const float32x4x2_t g_n_h = vzipq_f32(e, f); |
219 // Store | 220 // Store |
220 vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); | 221 vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); |
221 vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); | 222 vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); |
222 } | 223 } |
223 // ... and fixup the first imaginary entry. | 224 // ... and fixup the first imaginary entry. |
224 fft[1] = | 225 fft[1] = |
225 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], | 226 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], |
226 e_fft[0][PART_LEN], e_fft[1][PART_LEN]); | 227 e_fft[0][PART_LEN], e_fft[1][PART_LEN]); |
227 | 228 |
228 aec_rdft_inverse_128(fft); | 229 ooura_fft.InverseFft(fft); |
229 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); | 230 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); |
230 | 231 |
231 // fft scaling | 232 // fft scaling |
232 { | 233 { |
233 const float scale = 2.0f / PART_LEN2; | 234 const float scale = 2.0f / PART_LEN2; |
234 const float32x4_t scale_ps = vmovq_n_f32(scale); | 235 const float32x4_t scale_ps = vmovq_n_f32(scale); |
235 for (j = 0; j < PART_LEN; j += 4) { | 236 for (j = 0; j < PART_LEN; j += 4) { |
236 const float32x4_t fft_ps = vld1q_f32(&fft[j]); | 237 const float32x4_t fft_ps = vld1q_f32(&fft[j]); |
237 const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); | 238 const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); |
238 vst1q_f32(&fft[j], fft_scale); | 239 vst1q_f32(&fft[j], fft_scale); |
239 } | 240 } |
240 } | 241 } |
241 aec_rdft_forward_128(fft); | 242 ooura_fft.Fft(fft); |
242 | 243 |
243 { | 244 { |
244 const float wt1 = h_fft_buf[1][pos]; | 245 const float wt1 = h_fft_buf[1][pos]; |
245 h_fft_buf[0][pos + PART_LEN] += fft[1]; | 246 h_fft_buf[0][pos + PART_LEN] += fft[1]; |
246 for (j = 0; j < PART_LEN; j += 4) { | 247 for (j = 0; j < PART_LEN; j += 4) { |
247 float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]); | 248 float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]); |
248 float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]); | 249 float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]); |
249 const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); | 250 const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); |
250 const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); | 251 const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); |
251 const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); | 252 const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); |
(...skipping 475 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
727 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 728 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
728 WebRtcAec_Overdrive = OverdriveNEON; | 729 WebRtcAec_Overdrive = OverdriveNEON; |
729 WebRtcAec_Suppress = SuppressNEON; | 730 WebRtcAec_Suppress = SuppressNEON; |
730 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; | 731 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; |
731 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; | 732 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; |
732 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 733 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
733 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 734 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
734 WebRtcAec_WindowData = WindowDataNEON; | 735 WebRtcAec_WindowData = WindowDataNEON; |
735 } | 736 } |
736 } // namespace webrtc | 737 } // namespace webrtc |
OLD | NEW |