Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(45)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_mips.cc

Issue 1942853002: Removed the MIPS optimized code for the comfort noise generation in the AEC. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec2_CL
Patch Set: Rebase Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 /* 11 /*
12 * The core AEC algorithm, which is presented with time-aligned signals. 12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */ 13 */
14 14
15 #include "webrtc/modules/audio_processing/aec/aec_core.h" 15 #include "webrtc/modules/audio_processing/aec/aec_core.h"
16 16
17 #include <math.h> 17 #include <math.h>
18 18
19 extern "C" { 19 extern "C" {
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" 20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"
21 } 21 }
22 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" 22 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
23 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 23 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"
24 24
25 namespace webrtc { 25 namespace webrtc {
26 26
27 extern const float WebRtcAec_weightCurve[65]; 27 extern const float WebRtcAec_weightCurve[65];
28 extern const float WebRtcAec_overDriveCurve[65]; 28 extern const float WebRtcAec_overDriveCurve[65];
29 29
30 void WebRtcAec_ComfortNoise_mips(AecCore* aec,
31 float efw[2][PART_LEN1],
32 float comfortNoiseHband[2][PART_LEN1],
33 const float* noisePow,
34 const float* lambda) {
35 int i, num;
36 float rand[PART_LEN];
37 float noise, noiseAvg, tmp, tmpAvg;
38 int16_t randW16[PART_LEN];
39 complex_t u[PART_LEN1];
40
41 const float pi2 = 6.28318530717959f;
42 const float pi2t = pi2 / 32768;
43
44 // Generate a uniform random array on [0 1]
45 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
46
47 int16_t* randWptr = randW16;
48 float randTemp, randTemp2, randTemp3, randTemp4;
49 int32_t tmp1s, tmp2s, tmp3s, tmp4s;
50
51 for (i = 0; i < PART_LEN; i += 4) {
52 __asm __volatile(
53 ".set push \n\t"
54 ".set noreorder \n\t"
55 "lh %[tmp1s], 0(%[randWptr]) \n\t"
56 "lh %[tmp2s], 2(%[randWptr]) \n\t"
57 "lh %[tmp3s], 4(%[randWptr]) \n\t"
58 "lh %[tmp4s], 6(%[randWptr]) \n\t"
59 "mtc1 %[tmp1s], %[randTemp] \n\t"
60 "mtc1 %[tmp2s], %[randTemp2] \n\t"
61 "mtc1 %[tmp3s], %[randTemp3] \n\t"
62 "mtc1 %[tmp4s], %[randTemp4] \n\t"
63 "cvt.s.w %[randTemp], %[randTemp] \n\t"
64 "cvt.s.w %[randTemp2], %[randTemp2] \n\t"
65 "cvt.s.w %[randTemp3], %[randTemp3] \n\t"
66 "cvt.s.w %[randTemp4], %[randTemp4] \n\t"
67 "addiu %[randWptr], %[randWptr], 8 \n\t"
68 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
69 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
70 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
71 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
72 ".set pop \n\t"
73 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
74 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
75 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
76 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
77 [tmp4s] "=&r" (tmp4s)
78 : [pi2t] "f" (pi2t)
79 : "memory");
80
81 u[i + 1][0] = cosf(randTemp);
82 u[i + 1][1] = sinf(randTemp);
83 u[i + 2][0] = cosf(randTemp2);
84 u[i + 2][1] = sinf(randTemp2);
85 u[i + 3][0] = cosf(randTemp3);
86 u[i + 3][1] = sinf(randTemp3);
87 u[i + 4][0] = cosf(randTemp4);
88 u[i + 4][1] = sinf(randTemp4);
89 }
90
91 // Reject LF noise
92 float* u_ptr = &u[1][0];
93 float noise2, noise3, noise4;
94 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
95
96 u[0][0] = 0;
97 u[0][1] = 0;
98 for (i = 1; i < PART_LEN1; i += 4) {
99 __asm __volatile(
100 ".set push \n\t"
101 ".set noreorder \n\t"
102 "lwc1 %[noise], 4(%[noisePow]) \n\t"
103 "lwc1 %[noise2], 8(%[noisePow]) \n\t"
104 "lwc1 %[noise3], 12(%[noisePow]) \n\t"
105 "lwc1 %[noise4], 16(%[noisePow]) \n\t"
106 "sqrt.s %[noise], %[noise] \n\t"
107 "sqrt.s %[noise2], %[noise2] \n\t"
108 "sqrt.s %[noise3], %[noise3] \n\t"
109 "sqrt.s %[noise4], %[noise4] \n\t"
110 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
111 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
112 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
113 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
114 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
115 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
116 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
117 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
118 "addiu %[noisePow], %[noisePow], 16 \n\t"
119 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
120 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
121 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
122 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
123 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
124 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
125 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
126 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
127 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
128 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
129 "neg.s %[tmp2f] \n\t"
130 "neg.s %[tmp4f] \n\t"
131 "neg.s %[tmp6f] \n\t"
132 "neg.s %[tmp8f] \n\t"
133 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
134 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
135 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
136 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
137 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
138 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
139 "addiu %[u_ptr], %[u_ptr], 32 \n\t"
140 ".set pop \n\t"
141 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),
142 [noise] "=&f" (noise), [noise2] "=&f" (noise2),
143 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
144 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
145 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
146 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
147 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
148 :
149 : "memory");
150 }
151 u[PART_LEN][1] = 0;
152 noisePow -= PART_LEN;
153
154 u_ptr = &u[0][0];
155 float* u_ptr_end = &u[PART_LEN][0];
156 float* efw_ptr_0 = &efw[0][0];
157 float* efw_ptr_1 = &efw[1][0];
158 float tmp9f, tmp10f;
159 const float tmp1c = 1.0;
160
161 __asm __volatile(
162 ".set push \n\t"
163 ".set noreorder \n\t"
164 "1: \n\t"
165 "lwc1 %[tmp1f], 0(%[lambda]) \n\t"
166 "lwc1 %[tmp6f], 4(%[lambda]) \n\t"
167 "addiu %[lambda], %[lambda], 8 \n\t"
168 "c.lt.s %[tmp1f], %[tmp1c] \n\t"
169 "bc1f 4f \n\t"
170 " nop \n\t"
171 "c.lt.s %[tmp6f], %[tmp1c] \n\t"
172 "bc1f 3f \n\t"
173 " nop \n\t"
174 "2: \n\t"
175 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
176 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
177 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
178 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
179 "sqrt.s %[tmp1f], %[tmp1f] \n\t"
180 "sqrt.s %[tmp6f], %[tmp6f] \n\t"
181 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
182 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
183 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
184 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
185 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
186 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
187 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
188 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
189 #if !defined(MIPS32_R2_LE)
190 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
191 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
192 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
193 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
194 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
195 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
196 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
197 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
198 #else // #if !defined(MIPS32_R2_LE)
199 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
200 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
201 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
202 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
203 #endif // #if !defined(MIPS32_R2_LE)
204 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
205 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
206 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
207 "b 5f \n\t"
208 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
209 "3: \n\t"
210 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
211 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
212 "sqrt.s %[tmp1f], %[tmp1f] \n\t"
213 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
214 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
215 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
216 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
217 #if !defined(MIPS32_R2_LE)
218 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
219 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
220 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
221 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
222 #else // #if !defined(MIPS32_R2_LE)
223 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
224 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
225 #endif // #if !defined(MIPS32_R2_LE)
226 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
227 "b 5f \n\t"
228 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
229 "4: \n\t"
230 "c.lt.s %[tmp6f], %[tmp1c] \n\t"
231 "bc1f 5f \n\t"
232 " nop \n\t"
233 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
234 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
235 "sqrt.s %[tmp6f], %[tmp6f] \n\t"
236 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
237 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
238 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
239 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
240 #if !defined(MIPS32_R2_LE)
241 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
242 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
243 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
244 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
245 #else // #if !defined(MIPS32_R2_LE)
246 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
247 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
248 #endif // #if !defined(MIPS32_R2_LE)
249 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
250 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
251 "5: \n\t"
252 "addiu %[u_ptr], %[u_ptr], 16 \n\t"
253 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"
254 "bne %[u_ptr], %[u_ptr_end], 1b \n\t"
255 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"
256 ".set pop \n\t"
257 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
258 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
259 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
260 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
261 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
262 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
263 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
264 : "memory");
265
266 lambda -= PART_LEN;
267 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
268 // tmp = 1 - lambda[i];
269 efw[0][PART_LEN] += tmp * u[PART_LEN][0];
270 efw[1][PART_LEN] += tmp * u[PART_LEN][1];
271
272 // For H band comfort noise
273 // TODO(peah): don't compute noise and "tmp" twice. Use the previous results.
274 noiseAvg = 0.0;
275 tmpAvg = 0.0;
276 num = 0;
277 if (aec->num_bands > 1) {
278 for (i = 0; i < PART_LEN; i++) {
279 rand[i] = (static_cast<float>(randW16[i])) / 32768;
280 }
281
282 // average noise scale
283 // average over second half of freq spectrum (i.e., 4->8khz)
284 // TODO(peah): we shouldn't need num. We know how many elements we're
285 // summing.
286 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
287 num++;
288 noiseAvg += sqrtf(noisePow[i]);
289 }
290 noiseAvg /= static_cast<float>(num);
291
292 // average nlp scale
293 // average over second half of freq spectrum (i.e., 4->8khz)
294 // TODO(peah): we shouldn't need num. We know how many elements we're
295 // summing.
296 num = 0;
297 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
298 num++;
299 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
300 }
301 tmpAvg /= static_cast<float>(num);
302
303 // Use average noise for H band
304 // TODO(peah): we should probably have a new random vector here.
305 // Reject LF noise
306 u[0][0] = 0;
307 u[0][1] = 0;
308 for (i = 1; i < PART_LEN1; i++) {
309 tmp = pi2 * rand[i - 1];
310
311 // Use average noise for H band
312 u[i][0] = noiseAvg * static_cast<float>(cos(tmp));
313 u[i][1] = -noiseAvg * static_cast<float>(sin(tmp));
314 }
315 u[PART_LEN][1] = 0;
316
317 for (i = 0; i < PART_LEN1; i++) {
318 // Use average NLP weight for H band
319 comfortNoiseHband[0][i] = tmpAvg * u[i][0];
320 comfortNoiseHband[1][i] = tmpAvg * u[i][1];
321 }
322 } else {
323 memset(comfortNoiseHband, 0,
324 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
325 }
326 }
327
328 void WebRtcAec_FilterFar_mips( 30 void WebRtcAec_FilterFar_mips(
329 int num_partitions, 31 int num_partitions,
330 int x_fft_buf_block_pos, 32 int x_fft_buf_block_pos,
331 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 33 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
332 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 34 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
333 float y_fft[2][PART_LEN1]) { 35 float y_fft[2][PART_LEN1]) {
334 int i; 36 int i;
335 for (i = 0; i < num_partitions; i++) { 37 for (i = 0; i < num_partitions; i++) {
336 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; 38 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
337 int pos = i * PART_LEN1; 39 int pos = i * PART_LEN1;
(...skipping 428 matching lines...) Expand 10 before | Expand all | Expand 10 after
766 [len] "+r" (len) 468 [len] "+r" (len)
767 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), 469 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
768 [err_th] "f" (error_threshold) 470 [err_th] "f" (error_threshold)
769 : "memory"); 471 : "memory");
770 } 472 }
771 473
772 void WebRtcAec_InitAec_mips(void) { 474 void WebRtcAec_InitAec_mips(void) {
773 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; 475 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
774 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; 476 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
775 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; 477 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
776 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
777 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; 478 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
778 } 479 }
779 } // namespace webrtc 480 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698