OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 /* | 11 /* |
12 * The core AEC algorithm, SSE2 version of speed-critical functions. | 12 * The core AEC algorithm, SSE2 version of speed-critical functions. |
13 */ | 13 */ |
14 | 14 |
15 #include <emmintrin.h> | 15 #include <emmintrin.h> |
16 #include <math.h> | 16 #include <math.h> |
17 #include <string.h> // memset | 17 #include <string.h> // memset |
18 | 18 |
19 extern "C" { | 19 extern "C" { |
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
21 } | 21 } |
22 #include "webrtc/modules/audio_processing/aec/aec_common.h" | 22 #include "webrtc/modules/audio_processing/aec/aec_common.h" |
23 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" | 23 #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" |
24 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" | 24 #include "webrtc/modules/audio_processing/utility/ooura_fft.h" |
25 | 25 |
26 namespace webrtc { | 26 namespace webrtc { |
27 | 27 |
28 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { | 28 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { |
29 return aRe * bRe - aIm * bIm; | 29 return aRe * bRe - aIm * bIm; |
30 } | 30 } |
31 | 31 |
32 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { | 32 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { |
33 return aRe * bIm + aIm * bRe; | 33 return aRe * bIm + aIm * bRe; |
34 } | 34 } |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 } | 133 } |
134 | 134 |
135 // Stepsize factor | 135 // Stepsize factor |
136 ef[0][i] *= mu; | 136 ef[0][i] *= mu; |
137 ef[1][i] *= mu; | 137 ef[1][i] *= mu; |
138 } | 138 } |
139 } | 139 } |
140 } | 140 } |
141 | 141 |
142 static void FilterAdaptationSSE2( | 142 static void FilterAdaptationSSE2( |
| 143 const OouraFft& ooura_fft, |
143 int num_partitions, | 144 int num_partitions, |
144 int x_fft_buf_block_pos, | 145 int x_fft_buf_block_pos, |
145 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], | 146 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], |
146 float e_fft[2][PART_LEN1], | 147 float e_fft[2][PART_LEN1], |
147 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { | 148 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { |
148 float fft[PART_LEN2]; | 149 float fft[PART_LEN2]; |
149 int i, j; | 150 int i, j; |
150 for (i = 0; i < num_partitions; i++) { | 151 for (i = 0; i < num_partitions; i++) { |
151 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); | 152 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); |
152 int pos = i * PART_LEN1; | 153 int pos = i * PART_LEN1; |
(...skipping 23 matching lines...) Expand all Loading... |
176 const __m128 h = _mm_unpackhi_ps(e, f); | 177 const __m128 h = _mm_unpackhi_ps(e, f); |
177 // Store | 178 // Store |
178 _mm_storeu_ps(&fft[2 * j + 0], g); | 179 _mm_storeu_ps(&fft[2 * j + 0], g); |
179 _mm_storeu_ps(&fft[2 * j + 4], h); | 180 _mm_storeu_ps(&fft[2 * j + 4], h); |
180 } | 181 } |
181 // ... and fixup the first imaginary entry. | 182 // ... and fixup the first imaginary entry. |
182 fft[1] = | 183 fft[1] = |
183 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], | 184 MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], |
184 e_fft[0][PART_LEN], e_fft[1][PART_LEN]); | 185 e_fft[0][PART_LEN], e_fft[1][PART_LEN]); |
185 | 186 |
186 aec_rdft_inverse_128(fft); | 187 ooura_fft.InverseFft(fft); |
187 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); | 188 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); |
188 | 189 |
189 // fft scaling | 190 // fft scaling |
190 { | 191 { |
191 float scale = 2.0f / PART_LEN2; | 192 float scale = 2.0f / PART_LEN2; |
192 const __m128 scale_ps = _mm_load_ps1(&scale); | 193 const __m128 scale_ps = _mm_load_ps1(&scale); |
193 for (j = 0; j < PART_LEN; j += 4) { | 194 for (j = 0; j < PART_LEN; j += 4) { |
194 const __m128 fft_ps = _mm_loadu_ps(&fft[j]); | 195 const __m128 fft_ps = _mm_loadu_ps(&fft[j]); |
195 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); | 196 const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); |
196 _mm_storeu_ps(&fft[j], fft_scale); | 197 _mm_storeu_ps(&fft[j], fft_scale); |
197 } | 198 } |
198 } | 199 } |
199 aec_rdft_forward_128(fft); | 200 ooura_fft.Fft(fft); |
200 | 201 |
201 { | 202 { |
202 float wt1 = h_fft_buf[1][pos]; | 203 float wt1 = h_fft_buf[1][pos]; |
203 h_fft_buf[0][pos + PART_LEN] += fft[1]; | 204 h_fft_buf[0][pos + PART_LEN] += fft[1]; |
204 for (j = 0; j < PART_LEN; j += 4) { | 205 for (j = 0; j < PART_LEN; j += 4) { |
205 __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); | 206 __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); |
206 __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); | 207 __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); |
207 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); | 208 const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); |
208 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); | 209 const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); |
209 const __m128 fft_re = | 210 const __m128 fft_re = |
(...skipping 531 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
741 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 742 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
742 WebRtcAec_Overdrive = OverdriveSSE2; | 743 WebRtcAec_Overdrive = OverdriveSSE2; |
743 WebRtcAec_Suppress = SuppressSSE2; | 744 WebRtcAec_Suppress = SuppressSSE2; |
744 WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2; | 745 WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2; |
745 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2; | 746 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2; |
746 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 747 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; |
747 WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 748 WebRtcAec_PartitionDelay = PartitionDelaySSE2; |
748 WebRtcAec_WindowData = WindowDataSSE2; | 749 WebRtcAec_WindowData = WindowDataSSE2; |
749 } | 750 } |
750 } // namespace webrtc | 751 } // namespace webrtc |
OLD | NEW |