Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(205)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.cc

Issue 2348213002: Move the aec_rdft* files to a more proper place beneath APM and make them thread-safe. (Closed)
Patch Set: Rebase Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 /* 11 /*
12 * The core AEC algorithm, SSE2 version of speed-critical functions. 12 * The core AEC algorithm, SSE2 version of speed-critical functions.
13 */ 13 */
14 14
15 #include <emmintrin.h> 15 #include <emmintrin.h>
16 #include <math.h> 16 #include <math.h>
17 #include <string.h> // memset 17 #include <string.h> // memset
18 18
19 extern "C" { 19 extern "C" {
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" 20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"
21 } 21 }
22 #include "webrtc/modules/audio_processing/aec/aec_common.h" 22 #include "webrtc/modules/audio_processing/aec/aec_common.h"
23 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" 23 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h"
24 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 24 #include "webrtc/modules/audio_processing/utility/ooura_fft.h"
25 25
26 namespace webrtc { 26 namespace webrtc {
27 27
28 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 28 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
29 return aRe * bRe - aIm * bIm; 29 return aRe * bRe - aIm * bIm;
30 } 30 }
31 31
32 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { 32 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
33 return aRe * bIm + aIm * bRe; 33 return aRe * bIm + aIm * bRe;
34 } 34 }
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 } 133 }
134 134
135 // Stepsize factor 135 // Stepsize factor
136 ef[0][i] *= mu; 136 ef[0][i] *= mu;
137 ef[1][i] *= mu; 137 ef[1][i] *= mu;
138 } 138 }
139 } 139 }
140 } 140 }
141 141
142 static void FilterAdaptationSSE2( 142 static void FilterAdaptationSSE2(
143 const OouraFft& ooura_fft,
143 int num_partitions, 144 int num_partitions,
144 int x_fft_buf_block_pos, 145 int x_fft_buf_block_pos,
145 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 146 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
146 float e_fft[2][PART_LEN1], 147 float e_fft[2][PART_LEN1],
147 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { 148 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
148 float fft[PART_LEN2]; 149 float fft[PART_LEN2];
149 int i, j; 150 int i, j;
150 for (i = 0; i < num_partitions; i++) { 151 for (i = 0; i < num_partitions; i++) {
151 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); 152 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
152 int pos = i * PART_LEN1; 153 int pos = i * PART_LEN1;
(...skipping 23 matching lines...) Expand all
176 const __m128 h = _mm_unpackhi_ps(e, f); 177 const __m128 h = _mm_unpackhi_ps(e, f);
177 // Store 178 // Store
178 _mm_storeu_ps(&fft[2 * j + 0], g); 179 _mm_storeu_ps(&fft[2 * j + 0], g);
179 _mm_storeu_ps(&fft[2 * j + 4], h); 180 _mm_storeu_ps(&fft[2 * j + 4], h);
180 } 181 }
181 // ... and fixup the first imaginary entry. 182 // ... and fixup the first imaginary entry.
182 fft[1] = 183 fft[1] =
183 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], 184 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN],
184 e_fft[0][PART_LEN], e_fft[1][PART_LEN]); 185 e_fft[0][PART_LEN], e_fft[1][PART_LEN]);
185 186
186 aec_rdft_inverse_128(fft); 187 ooura_fft.InverseFft(fft);
187 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 188 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
188 189
189 // fft scaling 190 // fft scaling
190 { 191 {
191 float scale = 2.0f / PART_LEN2; 192 float scale = 2.0f / PART_LEN2;
192 const __m128 scale_ps = _mm_load_ps1(&scale); 193 const __m128 scale_ps = _mm_load_ps1(&scale);
193 for (j = 0; j < PART_LEN; j += 4) { 194 for (j = 0; j < PART_LEN; j += 4) {
194 const __m128 fft_ps = _mm_loadu_ps(&fft[j]); 195 const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
195 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); 196 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
196 _mm_storeu_ps(&fft[j], fft_scale); 197 _mm_storeu_ps(&fft[j], fft_scale);
197 } 198 }
198 } 199 }
199 aec_rdft_forward_128(fft); 200 ooura_fft.Fft(fft);
200 201
201 { 202 {
202 float wt1 = h_fft_buf[1][pos]; 203 float wt1 = h_fft_buf[1][pos];
203 h_fft_buf[0][pos + PART_LEN] += fft[1]; 204 h_fft_buf[0][pos + PART_LEN] += fft[1];
204 for (j = 0; j < PART_LEN; j += 4) { 205 for (j = 0; j < PART_LEN; j += 4) {
205 __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); 206 __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
206 __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); 207 __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
207 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); 208 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
208 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); 209 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
209 const __m128 fft_re = 210 const __m128 fft_re =
(...skipping 531 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 742 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
742 WebRtcAec_Overdrive = OverdriveSSE2; 743 WebRtcAec_Overdrive = OverdriveSSE2;
743 WebRtcAec_Suppress = SuppressSSE2; 744 WebRtcAec_Suppress = SuppressSSE2;
744 WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2; 745 WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2;
745 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2; 746 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2;
746 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; 747 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;
747 WebRtcAec_PartitionDelay = PartitionDelaySSE2; 748 WebRtcAec_PartitionDelay = PartitionDelaySSE2;
748 WebRtcAec_WindowData = WindowDataSSE2; 749 WebRtcAec_WindowData = WindowDataSSE2;
749 } 750 }
750 } // namespace webrtc 751 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h ('k') | webrtc/modules/audio_processing/aec/aec_rdft.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698