Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(424)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 2348213002: Move the aec_rdft* files to a more proper place beneath APM and make them thread-safe. (Closed)
Patch Set: Rebase Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 /* 11 /*
12 * The core AEC algorithm, neon version of speed-critical functions. 12 * The core AEC algorithm, neon version of speed-critical functions.
13 * 13 *
14 * Based on aec_core_sse2.c. 14 * Based on aec_core_sse2.c.
15 */ 15 */
16 16
17 #include <arm_neon.h> 17 #include <arm_neon.h>
18 #include <math.h> 18 #include <math.h>
19 #include <string.h> // memset 19 #include <string.h> // memset
20 20
21 extern "C" { 21 extern "C" {
22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" 22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"
23 } 23 }
24 #include "webrtc/modules/audio_processing/aec/aec_common.h" 24 #include "webrtc/modules/audio_processing/aec/aec_common.h"
25 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" 25 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h"
26 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 26 #include "webrtc/modules/audio_processing/utility/ooura_fft.h"
27 27
28 namespace webrtc { 28 namespace webrtc {
29 29
30 enum { kShiftExponentIntoTopMantissa = 8 }; 30 enum { kShiftExponentIntoTopMantissa = 8 };
31 enum { kFloatExponentShift = 23 }; 31 enum { kFloatExponentShift = 23 };
32 32
33 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 33 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
34 return aRe * bRe - aIm * bIm; 34 return aRe * bRe - aIm * bIm;
35 } 35 }
36 36
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
177 ef[1][i] *= abs_ef; 177 ef[1][i] *= abs_ef;
178 } 178 }
179 179
180 // Stepsize factor 180 // Stepsize factor
181 ef[0][i] *= mu; 181 ef[0][i] *= mu;
182 ef[1][i] *= mu; 182 ef[1][i] *= mu;
183 } 183 }
184 } 184 }
185 185
186 static void FilterAdaptationNEON( 186 static void FilterAdaptationNEON(
187 const OouraFft& ooura_fft,
187 int num_partitions, 188 int num_partitions,
188 int x_fft_buf_block_pos, 189 int x_fft_buf_block_pos,
189 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 190 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
190 float e_fft[2][PART_LEN1], 191 float e_fft[2][PART_LEN1],
191 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { 192 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
192 float fft[PART_LEN2]; 193 float fft[PART_LEN2];
193 int i; 194 int i;
194 for (i = 0; i < num_partitions; i++) { 195 for (i = 0; i < num_partitions; i++) {
195 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; 196 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
196 int pos = i * PART_LEN1; 197 int pos = i * PART_LEN1;
(...skipping 21 matching lines...) Expand all
218 const float32x4x2_t g_n_h = vzipq_f32(e, f); 219 const float32x4x2_t g_n_h = vzipq_f32(e, f);
219 // Store 220 // Store
220 vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); 221 vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
221 vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); 222 vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
222 } 223 }
223 // ... and fixup the first imaginary entry. 224 // ... and fixup the first imaginary entry.
224 fft[1] = 225 fft[1] =
225 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], 226 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN],
226 e_fft[0][PART_LEN], e_fft[1][PART_LEN]); 227 e_fft[0][PART_LEN], e_fft[1][PART_LEN]);
227 228
228 aec_rdft_inverse_128(fft); 229 ooura_fft.InverseFft(fft);
229 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 230 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
230 231
231 // fft scaling 232 // fft scaling
232 { 233 {
233 const float scale = 2.0f / PART_LEN2; 234 const float scale = 2.0f / PART_LEN2;
234 const float32x4_t scale_ps = vmovq_n_f32(scale); 235 const float32x4_t scale_ps = vmovq_n_f32(scale);
235 for (j = 0; j < PART_LEN; j += 4) { 236 for (j = 0; j < PART_LEN; j += 4) {
236 const float32x4_t fft_ps = vld1q_f32(&fft[j]); 237 const float32x4_t fft_ps = vld1q_f32(&fft[j]);
237 const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); 238 const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
238 vst1q_f32(&fft[j], fft_scale); 239 vst1q_f32(&fft[j], fft_scale);
239 } 240 }
240 } 241 }
241 aec_rdft_forward_128(fft); 242 ooura_fft.Fft(fft);
242 243
243 { 244 {
244 const float wt1 = h_fft_buf[1][pos]; 245 const float wt1 = h_fft_buf[1][pos];
245 h_fft_buf[0][pos + PART_LEN] += fft[1]; 246 h_fft_buf[0][pos + PART_LEN] += fft[1];
246 for (j = 0; j < PART_LEN; j += 4) { 247 for (j = 0; j < PART_LEN; j += 4) {
247 float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]); 248 float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]);
248 float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]); 249 float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]);
249 const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); 250 const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
250 const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); 251 const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
251 const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); 252 const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
(...skipping 475 matching lines...) Expand 10 before | Expand all | Expand 10 after
727 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 728 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
728 WebRtcAec_Overdrive = OverdriveNEON; 729 WebRtcAec_Overdrive = OverdriveNEON;
729 WebRtcAec_Suppress = SuppressNEON; 730 WebRtcAec_Suppress = SuppressNEON;
730 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; 731 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;
731 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; 732 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;
732 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; 733 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
733 WebRtcAec_PartitionDelay = PartitionDelayNEON; 734 WebRtcAec_PartitionDelay = PartitionDelayNEON;
734 WebRtcAec_WindowData = WindowDataNEON; 735 WebRtcAec_WindowData = WindowDataNEON;
735 } 736 }
736 } // namespace webrtc 737 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.cc ('k') | webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698