OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h" | |
12 | |
13 #include <assert.h> | |
14 | |
15 #include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h" | |
16 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" | |
17 | |
18 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { | |
19 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, | |
20 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, | |
21 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, | |
22 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, | |
23 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, | |
24 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, | |
25 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, | |
26 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 | |
27 }; | |
28 | |
29 static const int16_t kNoiseEstQDomain = 15; | |
30 static const int16_t kNoiseEstIncCount = 5; | |
31 | |
32 static int16_t coefTable[] = { | |
33 0, 4, 256, 260, 128, 132, 384, 388, | |
34 64, 68, 320, 324, 192, 196, 448, 452, | |
35 32, 36, 288, 292, 160, 164, 416, 420, | |
36 96, 100, 352, 356, 224, 228, 480, 484, | |
37 16, 20, 272, 276, 144, 148, 400, 404, | |
38 80, 84, 336, 340, 208, 212, 464, 468, | |
39 48, 52, 304, 308, 176, 180, 432, 436, | |
40 112, 116, 368, 372, 240, 244, 496, 500, | |
41 8, 12, 264, 268, 136, 140, 392, 396, | |
42 72, 76, 328, 332, 200, 204, 456, 460, | |
43 40, 44, 296, 300, 168, 172, 424, 428, | |
44 104, 108, 360, 364, 232, 236, 488, 492, | |
45 24, 28, 280, 284, 152, 156, 408, 412, | |
46 88, 92, 344, 348, 216, 220, 472, 476, | |
47 56, 60, 312, 316, 184, 188, 440, 444, | |
48 120, 124, 376, 380, 248, 252, 504, 508 | |
49 }; | |
50 | |
51 static int16_t coefTable_ifft[] = { | |
52 0, 512, 256, 508, 128, 252, 384, 380, | |
53 64, 124, 320, 444, 192, 188, 448, 316, | |
54 32, 60, 288, 476, 160, 220, 416, 348, | |
55 96, 92, 352, 412, 224, 156, 480, 284, | |
56 16, 28, 272, 492, 144, 236, 400, 364, | |
57 80, 108, 336, 428, 208, 172, 464, 300, | |
58 48, 44, 304, 460, 176, 204, 432, 332, | |
59 112, 76, 368, 396, 240, 140, 496, 268, | |
60 8, 12, 264, 500, 136, 244, 392, 372, | |
61 72, 116, 328, 436, 200, 180, 456, 308, | |
62 40, 52, 296, 468, 168, 212, 424, 340, | |
63 104, 84, 360, 404, 232, 148, 488, 276, | |
64 24, 20, 280, 484, 152, 228, 408, 356, | |
65 88, 100, 344, 420, 216, 164, 472, 292, | |
66 56, 36, 312, 452, 184, 196, 440, 324, | |
67 120, 68, 376, 388, 248, 132, 504, 260 | |
68 }; | |
69 | |
70 static void ComfortNoise(AecmCore* aecm, | |
71 const uint16_t* dfa, | |
72 ComplexInt16* out, | |
73 const int16_t* lambda); | |
74 | |
75 static void WindowAndFFT(AecmCore* aecm, | |
76 int16_t* fft, | |
77 const int16_t* time_signal, | |
78 ComplexInt16* freq_signal, | |
79 int time_signal_scaling) { | |
80 int i, j; | |
81 int32_t tmp1, tmp2, tmp3, tmp4; | |
82 int16_t* pfrfi; | |
83 ComplexInt16* pfreq_signal; | |
84 int16_t f_coef, s_coef; | |
85 int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; | |
86 int32_t hann, hann1, coefs; | |
87 | |
88 memset(fft, 0, sizeof(int16_t) * PART_LEN4); | |
89 | |
90 // FFT of signal | |
91 __asm __volatile ( | |
92 ".set push \n\t" | |
93 ".set noreorder \n\t" | |
94 "addiu %[shift], %[time_signal_scaling], -14 \n\t" | |
95 "addiu %[i], $zero, 64 \n\t" | |
96 "addiu %[load_ptr], %[time_signal], 0 \n\t" | |
97 "addiu %[hann], %[hanning], 0 \n\t" | |
98 "addiu %[hann1], %[hanning], 128 \n\t" | |
99 "addiu %[coefs], %[coefTable], 0 \n\t" | |
100 "bltz %[shift], 2f \n\t" | |
101 " negu %[shift1], %[shift] \n\t" | |
102 "1: \n\t" | |
103 "lh %[tmp1], 0(%[load_ptr]) \n\t" | |
104 "lh %[tmp2], 0(%[hann]) \n\t" | |
105 "lh %[tmp3], 128(%[load_ptr]) \n\t" | |
106 "lh %[tmp4], 0(%[hann1]) \n\t" | |
107 "addiu %[i], %[i], -1 \n\t" | |
108 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" | |
109 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" | |
110 "lh %[f_coef], 0(%[coefs]) \n\t" | |
111 "lh %[s_coef], 2(%[coefs]) \n\t" | |
112 "addiu %[load_ptr], %[load_ptr], 2 \n\t" | |
113 "addiu %[hann], %[hann], 2 \n\t" | |
114 "addiu %[hann1], %[hann1], -2 \n\t" | |
115 "addu %[store_ptr1], %[fft], %[f_coef] \n\t" | |
116 "addu %[store_ptr2], %[fft], %[s_coef] \n\t" | |
117 "sllv %[tmp1], %[tmp1], %[shift] \n\t" | |
118 "sllv %[tmp3], %[tmp3], %[shift] \n\t" | |
119 "sh %[tmp1], 0(%[store_ptr1]) \n\t" | |
120 "sh %[tmp3], 0(%[store_ptr2]) \n\t" | |
121 "bgtz %[i], 1b \n\t" | |
122 " addiu %[coefs], %[coefs], 4 \n\t" | |
123 "b 3f \n\t" | |
124 " nop \n\t" | |
125 "2: \n\t" | |
126 "lh %[tmp1], 0(%[load_ptr]) \n\t" | |
127 "lh %[tmp2], 0(%[hann]) \n\t" | |
128 "lh %[tmp3], 128(%[load_ptr]) \n\t" | |
129 "lh %[tmp4], 0(%[hann1]) \n\t" | |
130 "addiu %[i], %[i], -1 \n\t" | |
131 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" | |
132 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" | |
133 "lh %[f_coef], 0(%[coefs]) \n\t" | |
134 "lh %[s_coef], 2(%[coefs]) \n\t" | |
135 "addiu %[load_ptr], %[load_ptr], 2 \n\t" | |
136 "addiu %[hann], %[hann], 2 \n\t" | |
137 "addiu %[hann1], %[hann1], -2 \n\t" | |
138 "addu %[store_ptr1], %[fft], %[f_coef] \n\t" | |
139 "addu %[store_ptr2], %[fft], %[s_coef] \n\t" | |
140 "srav %[tmp1], %[tmp1], %[shift1] \n\t" | |
141 "srav %[tmp3], %[tmp3], %[shift1] \n\t" | |
142 "sh %[tmp1], 0(%[store_ptr1]) \n\t" | |
143 "sh %[tmp3], 0(%[store_ptr2]) \n\t" | |
144 "bgtz %[i], 2b \n\t" | |
145 " addiu %[coefs], %[coefs], 4 \n\t" | |
146 "3: \n\t" | |
147 ".set pop \n\t" | |
148 : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann), | |
149 [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs), | |
150 [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), | |
151 [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef), | |
152 [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1), | |
153 [store_ptr2] "=&r" (store_ptr2) | |
154 : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable), | |
155 [time_signal_scaling] "r" (time_signal_scaling), | |
156 [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft) | |
157 : "memory", "hi", "lo" | |
158 ); | |
159 | |
160 WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); | |
161 pfrfi = fft; | |
162 pfreq_signal = freq_signal; | |
163 | |
164 __asm __volatile ( | |
165 ".set push \n\t" | |
166 ".set noreorder \n\t" | |
167 "addiu %[j], $zero, 128 \n\t" | |
168 "1: \n\t" | |
169 "lh %[tmp1], 0(%[pfrfi]) \n\t" | |
170 "lh %[tmp2], 2(%[pfrfi]) \n\t" | |
171 "lh %[tmp3], 4(%[pfrfi]) \n\t" | |
172 "lh %[tmp4], 6(%[pfrfi]) \n\t" | |
173 "subu %[tmp2], $zero, %[tmp2] \n\t" | |
174 "sh %[tmp1], 0(%[pfreq_signal]) \n\t" | |
175 "sh %[tmp2], 2(%[pfreq_signal]) \n\t" | |
176 "subu %[tmp4], $zero, %[tmp4] \n\t" | |
177 "sh %[tmp3], 4(%[pfreq_signal]) \n\t" | |
178 "sh %[tmp4], 6(%[pfreq_signal]) \n\t" | |
179 "lh %[tmp1], 8(%[pfrfi]) \n\t" | |
180 "lh %[tmp2], 10(%[pfrfi]) \n\t" | |
181 "lh %[tmp3], 12(%[pfrfi]) \n\t" | |
182 "lh %[tmp4], 14(%[pfrfi]) \n\t" | |
183 "addiu %[j], %[j], -8 \n\t" | |
184 "subu %[tmp2], $zero, %[tmp2] \n\t" | |
185 "sh %[tmp1], 8(%[pfreq_signal]) \n\t" | |
186 "sh %[tmp2], 10(%[pfreq_signal]) \n\t" | |
187 "subu %[tmp4], $zero, %[tmp4] \n\t" | |
188 "sh %[tmp3], 12(%[pfreq_signal]) \n\t" | |
189 "sh %[tmp4], 14(%[pfreq_signal]) \n\t" | |
190 "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t" | |
191 "bgtz %[j], 1b \n\t" | |
192 " addiu %[pfrfi], %[pfrfi], 16 \n\t" | |
193 ".set pop \n\t" | |
194 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), | |
195 [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal), | |
196 [tmp4] "=&r" (tmp4) | |
197 : | |
198 : "memory" | |
199 ); | |
200 } | |
201 | |
202 static void InverseFFTAndWindow(AecmCore* aecm, | |
203 int16_t* fft, | |
204 ComplexInt16* efw, | |
205 int16_t* output, | |
206 const int16_t* nearendClean) { | |
207 int i, outCFFT; | |
208 int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; | |
209 int16_t* pcoefTable_ifft = coefTable_ifft; | |
210 int16_t* pfft = fft; | |
211 int16_t* ppfft = fft; | |
212 ComplexInt16* pefw = efw; | |
213 int32_t out_aecm; | |
214 int16_t* paecm_buf = aecm->outBuf; | |
215 const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; | |
216 const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; | |
217 int16_t* output1 = output; | |
218 | |
219 __asm __volatile ( | |
220 ".set push \n\t" | |
221 ".set noreorder \n\t" | |
222 "addiu %[i], $zero, 64 \n\t" | |
223 "1: \n\t" | |
224 "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t" | |
225 "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t" | |
226 "lh %[tmp_re], 0(%[pefw]) \n\t" | |
227 "lh %[tmp_im], 2(%[pefw]) \n\t" | |
228 "addu %[pfft], %[fft], %[tmp2] \n\t" | |
229 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
230 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
231 "addu %[pfft], %[fft], %[tmp1] \n\t" | |
232 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
233 "subu %[tmp_im], $zero, %[tmp_im] \n\t" | |
234 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
235 "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t" | |
236 "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t" | |
237 "lh %[tmp_re], 4(%[pefw]) \n\t" | |
238 "lh %[tmp_im], 6(%[pefw]) \n\t" | |
239 "addu %[pfft], %[fft], %[tmp2] \n\t" | |
240 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
241 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
242 "addu %[pfft], %[fft], %[tmp1] \n\t" | |
243 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
244 "subu %[tmp_im], $zero, %[tmp_im] \n\t" | |
245 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
246 "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t" | |
247 "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t" | |
248 "lh %[tmp_re], 8(%[pefw]) \n\t" | |
249 "lh %[tmp_im], 10(%[pefw]) \n\t" | |
250 "addu %[pfft], %[fft], %[tmp2] \n\t" | |
251 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
252 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
253 "addu %[pfft], %[fft], %[tmp1] \n\t" | |
254 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
255 "subu %[tmp_im], $zero, %[tmp_im] \n\t" | |
256 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
257 "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t" | |
258 "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t" | |
259 "lh %[tmp_re], 12(%[pefw]) \n\t" | |
260 "lh %[tmp_im], 14(%[pefw]) \n\t" | |
261 "addu %[pfft], %[fft], %[tmp2] \n\t" | |
262 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
263 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
264 "addu %[pfft], %[fft], %[tmp1] \n\t" | |
265 "sh %[tmp_re], 0(%[pfft]) \n\t" | |
266 "subu %[tmp_im], $zero, %[tmp_im] \n\t" | |
267 "sh %[tmp_im], 2(%[pfft]) \n\t" | |
268 "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t" | |
269 "addiu %[i], %[i], -4 \n\t" | |
270 "bgtz %[i], 1b \n\t" | |
271 " addiu %[pefw], %[pefw], 16 \n\t" | |
272 ".set pop \n\t" | |
273 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), | |
274 [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im), | |
275 [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft), | |
276 [fft] "+r" (fft) | |
277 : | |
278 : "memory" | |
279 ); | |
280 | |
281 fft[2] = efw[PART_LEN].real; | |
282 fft[3] = -efw[PART_LEN].imag; | |
283 | |
284 outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); | |
285 pfft = fft; | |
286 | |
287 __asm __volatile ( | |
288 ".set push \n\t" | |
289 ".set noreorder \n\t" | |
290 "addiu %[i], $zero, 128 \n\t" | |
291 "1: \n\t" | |
292 "lh %[tmp1], 0(%[ppfft]) \n\t" | |
293 "lh %[tmp2], 4(%[ppfft]) \n\t" | |
294 "lh %[tmp3], 8(%[ppfft]) \n\t" | |
295 "lh %[tmp4], 12(%[ppfft]) \n\t" | |
296 "addiu %[i], %[i], -4 \n\t" | |
297 "sh %[tmp1], 0(%[pfft]) \n\t" | |
298 "sh %[tmp2], 2(%[pfft]) \n\t" | |
299 "sh %[tmp3], 4(%[pfft]) \n\t" | |
300 "sh %[tmp4], 6(%[pfft]) \n\t" | |
301 "addiu %[ppfft], %[ppfft], 16 \n\t" | |
302 "bgtz %[i], 1b \n\t" | |
303 " addiu %[pfft], %[pfft], 8 \n\t" | |
304 ".set pop \n\t" | |
305 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), | |
306 [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), | |
307 [ppfft] "+r" (ppfft) | |
308 : | |
309 : "memory" | |
310 ); | |
311 | |
312 pfft = fft; | |
313 out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); | |
314 | |
315 __asm __volatile ( | |
316 ".set push \n\t" | |
317 ".set noreorder \n\t" | |
318 "addiu %[i], $zero, 64 \n\t" | |
319 "11: \n\t" | |
320 "lh %[tmp1], 0(%[pfft]) \n\t" | |
321 "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t" | |
322 "addiu %[i], %[i], -2 \n\t" | |
323 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" | |
324 "lh %[tmp3], 2(%[pfft]) \n\t" | |
325 "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t" | |
326 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" | |
327 "addiu %[tmp1], %[tmp1], 8192 \n\t" | |
328 "sra %[tmp1], %[tmp1], 14 \n\t" | |
329 "addiu %[tmp3], %[tmp3], 8192 \n\t" | |
330 "sra %[tmp3], %[tmp3], 14 \n\t" | |
331 "bgez %[out_aecm], 1f \n\t" | |
332 " negu %[tmp2], %[out_aecm] \n\t" | |
333 "srav %[tmp1], %[tmp1], %[tmp2] \n\t" | |
334 "b 2f \n\t" | |
335 " srav %[tmp3], %[tmp3], %[tmp2] \n\t" | |
336 "1: \n\t" | |
337 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" | |
338 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" | |
339 "2: \n\t" | |
340 "lh %[tmp4], 0(%[paecm_buf]) \n\t" | |
341 "lh %[tmp2], 2(%[paecm_buf]) \n\t" | |
342 "addu %[tmp3], %[tmp3], %[tmp2] \n\t" | |
343 "addu %[tmp1], %[tmp1], %[tmp4] \n\t" | |
344 #if defined(MIPS_DSP_R1_LE) | |
345 "shll_s.w %[tmp1], %[tmp1], 16 \n\t" | |
346 "sra %[tmp1], %[tmp1], 16 \n\t" | |
347 "shll_s.w %[tmp3], %[tmp3], 16 \n\t" | |
348 "sra %[tmp3], %[tmp3], 16 \n\t" | |
349 #else // #if defined(MIPS_DSP_R1_LE) | |
350 "sra %[tmp4], %[tmp1], 31 \n\t" | |
351 "sra %[tmp2], %[tmp1], 15 \n\t" | |
352 "beq %[tmp4], %[tmp2], 3f \n\t" | |
353 " ori %[tmp2], $zero, 0x7fff \n\t" | |
354 "xor %[tmp1], %[tmp2], %[tmp4] \n\t" | |
355 "3: \n\t" | |
356 "sra %[tmp2], %[tmp3], 31 \n\t" | |
357 "sra %[tmp4], %[tmp3], 15 \n\t" | |
358 "beq %[tmp2], %[tmp4], 4f \n\t" | |
359 " ori %[tmp4], $zero, 0x7fff \n\t" | |
360 "xor %[tmp3], %[tmp4], %[tmp2] \n\t" | |
361 "4: \n\t" | |
362 #endif // #if defined(MIPS_DSP_R1_LE) | |
363 "sh %[tmp1], 0(%[pfft]) \n\t" | |
364 "sh %[tmp1], 0(%[output1]) \n\t" | |
365 "sh %[tmp3], 2(%[pfft]) \n\t" | |
366 "sh %[tmp3], 2(%[output1]) \n\t" | |
367 "lh %[tmp1], 128(%[pfft]) \n\t" | |
368 "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t" | |
369 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" | |
370 "lh %[tmp3], 130(%[pfft]) \n\t" | |
371 "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t" | |
372 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" | |
373 "sra %[tmp1], %[tmp1], 14 \n\t" | |
374 "sra %[tmp3], %[tmp3], 14 \n\t" | |
375 "bgez %[out_aecm], 5f \n\t" | |
376 " negu %[tmp2], %[out_aecm] \n\t" | |
377 "srav %[tmp3], %[tmp3], %[tmp2] \n\t" | |
378 "b 6f \n\t" | |
379 " srav %[tmp1], %[tmp1], %[tmp2] \n\t" | |
380 "5: \n\t" | |
381 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" | |
382 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" | |
383 "6: \n\t" | |
384 #if defined(MIPS_DSP_R1_LE) | |
385 "shll_s.w %[tmp1], %[tmp1], 16 \n\t" | |
386 "sra %[tmp1], %[tmp1], 16 \n\t" | |
387 "shll_s.w %[tmp3], %[tmp3], 16 \n\t" | |
388 "sra %[tmp3], %[tmp3], 16 \n\t" | |
389 #else // #if defined(MIPS_DSP_R1_LE) | |
390 "sra %[tmp4], %[tmp1], 31 \n\t" | |
391 "sra %[tmp2], %[tmp1], 15 \n\t" | |
392 "beq %[tmp4], %[tmp2], 7f \n\t" | |
393 " ori %[tmp2], $zero, 0x7fff \n\t" | |
394 "xor %[tmp1], %[tmp2], %[tmp4] \n\t" | |
395 "7: \n\t" | |
396 "sra %[tmp2], %[tmp3], 31 \n\t" | |
397 "sra %[tmp4], %[tmp3], 15 \n\t" | |
398 "beq %[tmp2], %[tmp4], 8f \n\t" | |
399 " ori %[tmp4], $zero, 0x7fff \n\t" | |
400 "xor %[tmp3], %[tmp4], %[tmp2] \n\t" | |
401 "8: \n\t" | |
402 #endif // #if defined(MIPS_DSP_R1_LE) | |
403 "sh %[tmp1], 0(%[paecm_buf]) \n\t" | |
404 "sh %[tmp3], 2(%[paecm_buf]) \n\t" | |
405 "addiu %[output1], %[output1], 4 \n\t" | |
406 "addiu %[paecm_buf], %[paecm_buf], 4 \n\t" | |
407 "addiu %[pfft], %[pfft], 4 \n\t" | |
408 "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t" | |
409 "bgtz %[i], 11b \n\t" | |
410 " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t" | |
411 ".set pop \n\t" | |
412 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), | |
413 [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), | |
414 [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i), | |
415 [pp_kSqrtHanning] "+r" (pp_kSqrtHanning), | |
416 [p_kSqrtHanning] "+r" (p_kSqrtHanning) | |
417 : [out_aecm] "r" (out_aecm), | |
418 [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning) | |
419 : "hi", "lo","memory" | |
420 ); | |
421 | |
422 // Copy the current block to the old position | |
423 // (aecm->outBuf is shifted elsewhere) | |
424 memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); | |
425 memcpy(aecm->dBufNoisy, | |
426 aecm->dBufNoisy + PART_LEN, | |
427 sizeof(int16_t) * PART_LEN); | |
428 if (nearendClean != NULL) { | |
429 memcpy(aecm->dBufClean, | |
430 aecm->dBufClean + PART_LEN, | |
431 sizeof(int16_t) * PART_LEN); | |
432 } | |
433 } | |
434 | |
435 void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, | |
436 const uint16_t* far_spectrum, | |
437 int32_t* echo_est, | |
438 uint32_t* far_energy, | |
439 uint32_t* echo_energy_adapt, | |
440 uint32_t* echo_energy_stored) { | |
441 int i; | |
442 uint32_t par1 = (*far_energy); | |
443 uint32_t par2 = (*echo_energy_adapt); | |
444 uint32_t par3 = (*echo_energy_stored); | |
445 int16_t* ch_stored_p = &(aecm->channelStored[0]); | |
446 int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); | |
447 uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); | |
448 int32_t* echo_p = &(echo_est[0]); | |
449 int32_t temp0, stored0, echo0, adept0, spectrum0; | |
450 int32_t stored1, adept1, spectrum1, echo1, temp1; | |
451 | |
452 // Get energy for the delayed far end signal and estimated | |
453 // echo using both stored and adapted channels. | |
454 for (i = 0; i < PART_LEN; i+= 4) { | |
455 __asm __volatile ( | |
456 ".set push \n\t" | |
457 ".set noreorder \n\t" | |
458 "lh %[stored0], 0(%[ch_stored_p]) \n\t" | |
459 "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" | |
460 "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" | |
461 "lh %[stored1], 2(%[ch_stored_p]) \n\t" | |
462 "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" | |
463 "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" | |
464 "mul %[echo0], %[stored0], %[spectrum0] \n\t" | |
465 "mul %[temp0], %[adept0], %[spectrum0] \n\t" | |
466 "mul %[echo1], %[stored1], %[spectrum1] \n\t" | |
467 "mul %[temp1], %[adept1], %[spectrum1] \n\t" | |
468 "addu %[par1], %[par1], %[spectrum0] \n\t" | |
469 "addu %[par1], %[par1], %[spectrum1] \n\t" | |
470 "addiu %[echo_p], %[echo_p], 16 \n\t" | |
471 "addu %[par3], %[par3], %[echo0] \n\t" | |
472 "addu %[par2], %[par2], %[temp0] \n\t" | |
473 "addu %[par3], %[par3], %[echo1] \n\t" | |
474 "addu %[par2], %[par2], %[temp1] \n\t" | |
475 "usw %[echo0], -16(%[echo_p]) \n\t" | |
476 "usw %[echo1], -12(%[echo_p]) \n\t" | |
477 "lh %[stored0], 4(%[ch_stored_p]) \n\t" | |
478 "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" | |
479 "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" | |
480 "lh %[stored1], 6(%[ch_stored_p]) \n\t" | |
481 "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" | |
482 "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" | |
483 "mul %[echo0], %[stored0], %[spectrum0] \n\t" | |
484 "mul %[temp0], %[adept0], %[spectrum0] \n\t" | |
485 "mul %[echo1], %[stored1], %[spectrum1] \n\t" | |
486 "mul %[temp1], %[adept1], %[spectrum1] \n\t" | |
487 "addu %[par1], %[par1], %[spectrum0] \n\t" | |
488 "addu %[par1], %[par1], %[spectrum1] \n\t" | |
489 "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" | |
490 "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" | |
491 "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" | |
492 "addu %[par3], %[par3], %[echo0] \n\t" | |
493 "addu %[par2], %[par2], %[temp0] \n\t" | |
494 "addu %[par3], %[par3], %[echo1] \n\t" | |
495 "addu %[par2], %[par2], %[temp1] \n\t" | |
496 "usw %[echo0], -8(%[echo_p]) \n\t" | |
497 "usw %[echo1], -4(%[echo_p]) \n\t" | |
498 ".set pop \n\t" | |
499 : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0), | |
500 [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0), | |
501 [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3), | |
502 [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1), | |
503 [adept1] "=&r" (adept1), [echo1] "=&r" (echo1), | |
504 [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1), | |
505 [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p), | |
506 [spectrum_p] "+r" (spectrum_p) | |
507 : | |
508 : "hi", "lo", "memory" | |
509 ); | |
510 } | |
511 | |
512 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], | |
513 far_spectrum[PART_LEN]); | |
514 par1 += (uint32_t)(far_spectrum[PART_LEN]); | |
515 par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; | |
516 par3 += (uint32_t)echo_est[PART_LEN]; | |
517 | |
518 (*far_energy) = par1; | |
519 (*echo_energy_adapt) = par2; | |
520 (*echo_energy_stored) = par3; | |
521 } | |
522 | |
523 #if defined(MIPS_DSP_R1_LE) | |
524 void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, | |
525 const uint16_t* far_spectrum, | |
526 int32_t* echo_est) { | |
527 int i; | |
528 int16_t* temp1; | |
529 uint16_t* temp8; | |
530 int32_t temp0, temp2, temp3, temp4, temp5, temp6; | |
531 int32_t* temp7 = &(echo_est[0]); | |
532 temp1 = &(aecm->channelStored[0]); | |
533 temp8 = (uint16_t*)(&far_spectrum[0]); | |
534 | |
535 // During startup we store the channel every block. | |
536 memcpy(aecm->channelStored, aecm->channelAdapt16, | |
537 sizeof(int16_t) * PART_LEN1); | |
538 // Recalculate echo estimate | |
539 for (i = 0; i < PART_LEN; i += 4) { | |
540 __asm __volatile ( | |
541 "ulw %[temp0], 0(%[temp8]) \n\t" | |
542 "ulw %[temp2], 0(%[temp1]) \n\t" | |
543 "ulw %[temp4], 4(%[temp8]) \n\t" | |
544 "ulw %[temp5], 4(%[temp1]) \n\t" | |
545 "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" | |
546 "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" | |
547 "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" | |
548 "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" | |
549 "addiu %[temp7], %[temp7], 16 \n\t" | |
550 "addiu %[temp1], %[temp1], 8 \n\t" | |
551 "addiu %[temp8], %[temp8], 8 \n\t" | |
552 "sra %[temp3], %[temp3], 1 \n\t" | |
553 "sra %[temp0], %[temp0], 1 \n\t" | |
554 "sra %[temp6], %[temp6], 1 \n\t" | |
555 "sra %[temp4], %[temp4], 1 \n\t" | |
556 "usw %[temp3], -12(%[temp7]) \n\t" | |
557 "usw %[temp0], -16(%[temp7]) \n\t" | |
558 "usw %[temp6], -4(%[temp7]) \n\t" | |
559 "usw %[temp4], -8(%[temp7]) \n\t" | |
560 : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), | |
561 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), | |
562 [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7) | |
563 : | |
564 : "hi", "lo", "memory" | |
565 ); | |
566 } | |
567 echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], | |
568 far_spectrum[i]); | |
569 } | |
570 | |
571 void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) { | |
572 int i; | |
573 int32_t* temp3; | |
574 int16_t* temp0; | |
575 int32_t temp1, temp2, temp4, temp5; | |
576 | |
577 temp0 = &(aecm->channelStored[0]); | |
578 temp3 = &(aecm->channelAdapt32[0]); | |
579 | |
580 // The stored channel has a significantly lower MSE than the adaptive one for | |
581 // two consecutive calculations. Reset the adaptive channel. | |
582 memcpy(aecm->channelAdapt16, | |
583 aecm->channelStored, | |
584 sizeof(int16_t) * PART_LEN1); | |
585 | |
586 // Restore the W32 channel | |
587 for (i = 0; i < PART_LEN; i += 4) { | |
588 __asm __volatile ( | |
589 "ulw %[temp1], 0(%[temp0]) \n\t" | |
590 "ulw %[temp4], 4(%[temp0]) \n\t" | |
591 "preceq.w.phl %[temp2], %[temp1] \n\t" | |
592 "preceq.w.phr %[temp1], %[temp1] \n\t" | |
593 "preceq.w.phl %[temp5], %[temp4] \n\t" | |
594 "preceq.w.phr %[temp4], %[temp4] \n\t" | |
595 "addiu %[temp0], %[temp0], 8 \n\t" | |
596 "usw %[temp2], 4(%[temp3]) \n\t" | |
597 "usw %[temp1], 0(%[temp3]) \n\t" | |
598 "usw %[temp5], 12(%[temp3]) \n\t" | |
599 "usw %[temp4], 8(%[temp3]) \n\t" | |
600 "addiu %[temp3], %[temp3], 16 \n\t" | |
601 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), | |
602 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), | |
603 [temp3] "+r" (temp3), [temp0] "+r" (temp0) | |
604 : | |
605 : "memory" | |
606 ); | |
607 } | |
608 | |
609 aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; | |
610 } | |
611 #endif // #if defined(MIPS_DSP_R1_LE) | |
612 | |
613 // Transforms a time domain signal into the frequency domain, outputting the | |
614 // complex valued signal, absolute value and sum of absolute values. | |
615 // | |
616 // time_signal [in] Pointer to time domain signal | |
617 // freq_signal_real [out] Pointer to real part of frequency domain array | |
618 // freq_signal_imag [out] Pointer to imaginary part of frequency domain | |
619 // array | |
620 // freq_signal_abs [out] Pointer to absolute value of frequency domain | |
621 // array | |
622 // freq_signal_sum_abs [out] Pointer to the sum of all absolute values in | |
623 // the frequency domain array | |
624 // return value The Q-domain of current frequency values | |
625 // | |
626 static int TimeToFrequencyDomain(AecmCore* aecm, | |
627 const int16_t* time_signal, | |
628 ComplexInt16* freq_signal, | |
629 uint16_t* freq_signal_abs, | |
630 uint32_t* freq_signal_sum_abs) { | |
631 int i = 0; | |
632 int time_signal_scaling = 0; | |
633 | |
634 // In fft_buf, +16 for 32-byte alignment. | |
635 int16_t fft_buf[PART_LEN4 + 16]; | |
636 int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); | |
637 | |
638 int16_t tmp16no1; | |
639 #if !defined(MIPS_DSP_R2_LE) | |
640 int32_t tmp32no1; | |
641 int32_t tmp32no2; | |
642 int16_t tmp16no2; | |
643 #else | |
644 int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; | |
645 int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; | |
646 int16_t* freqp; | |
647 uint16_t* freqabsp; | |
648 uint32_t freqt0, freqt1, freqt2, freqt3; | |
649 uint32_t freqs; | |
650 #endif | |
651 | |
652 #ifdef AECM_DYNAMIC_Q | |
653 tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); | |
654 time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); | |
655 #endif | |
656 | |
657 WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); | |
658 | |
659 // Extract imaginary and real part, | |
660 // calculate the magnitude for all frequency bins | |
661 freq_signal[0].imag = 0; | |
662 freq_signal[PART_LEN].imag = 0; | |
663 freq_signal[PART_LEN].real = fft[PART_LEN2]; | |
664 freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); | |
665 freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( | |
666 freq_signal[PART_LEN].real); | |
667 (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + | |
668 (uint32_t)(freq_signal_abs[PART_LEN]); | |
669 | |
670 #if !defined(MIPS_DSP_R2_LE) | |
671 for (i = 1; i < PART_LEN; i++) { | |
672 if (freq_signal[i].real == 0) | |
673 { | |
674 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( | |
675 freq_signal[i].imag); | |
676 } | |
677 else if (freq_signal[i].imag == 0) | |
678 { | |
679 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( | |
680 freq_signal[i].real); | |
681 } | |
682 else | |
683 { | |
684 // Approximation for magnitude of complex fft output | |
685 // magn = sqrt(real^2 + imag^2) | |
686 // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) | |
687 // | |
688 // The parameters alpha and beta are stored in Q15 | |
689 tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); | |
690 tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); | |
691 tmp32no1 = tmp16no1 * tmp16no1; | |
692 tmp32no2 = tmp16no2 * tmp16no2; | |
693 tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); | |
694 tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); | |
695 | |
696 freq_signal_abs[i] = (uint16_t)tmp32no1; | |
697 } | |
698 (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; | |
699 } | |
700 #else // #if !defined(MIPS_DSP_R2_LE) | |
701 freqs = (uint32_t)(freq_signal_abs[0]) + | |
702 (uint32_t)(freq_signal_abs[PART_LEN]); | |
703 freqp = &(freq_signal[1].real); | |
704 | |
705 __asm __volatile ( | |
706 "lw %[freqt0], 0(%[freqp]) \n\t" | |
707 "lw %[freqt1], 4(%[freqp]) \n\t" | |
708 "lw %[freqt2], 8(%[freqp]) \n\t" | |
709 "mult $ac0, $zero, $zero \n\t" | |
710 "mult $ac1, $zero, $zero \n\t" | |
711 "mult $ac2, $zero, $zero \n\t" | |
712 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" | |
713 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" | |
714 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" | |
715 "addiu %[freqp], %[freqp], 12 \n\t" | |
716 "extr.w %[tmp32no20], $ac0, 1 \n\t" | |
717 "extr.w %[tmp32no21], $ac1, 1 \n\t" | |
718 "extr.w %[tmp32no22], $ac2, 1 \n\t" | |
719 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), | |
720 [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp), | |
721 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), | |
722 [tmp32no22] "=r" (tmp32no22) | |
723 : | |
724 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo" | |
725 ); | |
726 | |
727 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); | |
728 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); | |
729 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); | |
730 freq_signal_abs[1] = (uint16_t)tmp32no10; | |
731 freq_signal_abs[2] = (uint16_t)tmp32no11; | |
732 freq_signal_abs[3] = (uint16_t)tmp32no12; | |
733 freqs += (uint32_t)tmp32no10; | |
734 freqs += (uint32_t)tmp32no11; | |
735 freqs += (uint32_t)tmp32no12; | |
736 freqabsp = &(freq_signal_abs[4]); | |
737 for (i = 4; i < PART_LEN; i+=4) | |
738 { | |
739 __asm __volatile ( | |
740 "ulw %[freqt0], 0(%[freqp]) \n\t" | |
741 "ulw %[freqt1], 4(%[freqp]) \n\t" | |
742 "ulw %[freqt2], 8(%[freqp]) \n\t" | |
743 "ulw %[freqt3], 12(%[freqp]) \n\t" | |
744 "mult $ac0, $zero, $zero \n\t" | |
745 "mult $ac1, $zero, $zero \n\t" | |
746 "mult $ac2, $zero, $zero \n\t" | |
747 "mult $ac3, $zero, $zero \n\t" | |
748 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" | |
749 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" | |
750 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" | |
751 "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" | |
752 "addiu %[freqp], %[freqp], 16 \n\t" | |
753 "addiu %[freqabsp], %[freqabsp], 8 \n\t" | |
754 "extr.w %[tmp32no20], $ac0, 1 \n\t" | |
755 "extr.w %[tmp32no21], $ac1, 1 \n\t" | |
756 "extr.w %[tmp32no22], $ac2, 1 \n\t" | |
757 "extr.w %[tmp32no23], $ac3, 1 \n\t" | |
758 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), | |
759 [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3), | |
760 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), | |
761 [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23), | |
762 [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp) | |
763 : | |
764 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", | |
765 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" | |
766 ); | |
767 | |
768 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); | |
769 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); | |
770 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); | |
771 tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); | |
772 | |
773 __asm __volatile ( | |
774 "sh %[tmp32no10], -8(%[freqabsp]) \n\t" | |
775 "sh %[tmp32no11], -6(%[freqabsp]) \n\t" | |
776 "sh %[tmp32no12], -4(%[freqabsp]) \n\t" | |
777 "sh %[tmp32no13], -2(%[freqabsp]) \n\t" | |
778 "addu %[freqs], %[freqs], %[tmp32no10] \n\t" | |
779 "addu %[freqs], %[freqs], %[tmp32no11] \n\t" | |
780 "addu %[freqs], %[freqs], %[tmp32no12] \n\t" | |
781 "addu %[freqs], %[freqs], %[tmp32no13] \n\t" | |
782 : [freqs] "+r" (freqs) | |
783 : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11), | |
784 [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13), | |
785 [freqabsp] "r" (freqabsp) | |
786 : "memory" | |
787 ); | |
788 } | |
789 | |
790 (*freq_signal_sum_abs) = freqs; | |
791 #endif | |
792 | |
793 return time_signal_scaling; | |
794 } | |
795 | |
796 int WebRtcAecm_ProcessBlock(AecmCore* aecm, | |
797 const int16_t* farend, | |
798 const int16_t* nearendNoisy, | |
799 const int16_t* nearendClean, | |
800 int16_t* output) { | |
801 int i; | |
802 uint32_t xfaSum; | |
803 uint32_t dfaNoisySum; | |
804 uint32_t dfaCleanSum; | |
805 uint32_t echoEst32Gained; | |
806 uint32_t tmpU32; | |
807 int32_t tmp32no1; | |
808 | |
809 uint16_t xfa[PART_LEN1]; | |
810 uint16_t dfaNoisy[PART_LEN1]; | |
811 uint16_t dfaClean[PART_LEN1]; | |
812 uint16_t* ptrDfaClean = dfaClean; | |
813 const uint16_t* far_spectrum_ptr = NULL; | |
814 | |
815 // 32 byte aligned buffers (with +8 or +16). | |
816 int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. | |
817 int32_t echoEst32_buf[PART_LEN1 + 8]; | |
818 int32_t dfw_buf[PART_LEN2 + 8]; | |
819 int32_t efw_buf[PART_LEN2 + 8]; | |
820 | |
821 int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31); | |
822 int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31); | |
823 ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); | |
824 ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); | |
825 | |
826 int16_t hnl[PART_LEN1]; | |
827 int16_t numPosCoef = 0; | |
828 int delay; | |
829 int16_t tmp16no1; | |
830 int16_t tmp16no2; | |
831 int16_t mu; | |
832 int16_t supGain; | |
833 int16_t zeros32, zeros16; | |
834 int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; | |
835 int far_q; | |
836 int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; | |
837 | |
838 const int kMinPrefBand = 4; | |
839 const int kMaxPrefBand = 24; | |
840 int32_t avgHnl32 = 0; | |
841 | |
842 int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; | |
843 int16_t* ptr; | |
844 int16_t* ptr1; | |
845 int16_t* er_ptr; | |
846 int16_t* dr_ptr; | |
847 | |
848 ptr = &hnl[0]; | |
849 ptr1 = &hnl[0]; | |
850 er_ptr = &efw[0].real; | |
851 dr_ptr = &dfw[0].real; | |
852 | |
853 // Determine startup state. There are three states: | |
854 // (0) the first CONV_LEN blocks | |
855 // (1) another CONV_LEN blocks | |
856 // (2) the rest | |
857 | |
858 if (aecm->startupState < 2) { | |
859 aecm->startupState = (aecm->totCount >= CONV_LEN) + | |
860 (aecm->totCount >= CONV_LEN2); | |
861 } | |
862 // END: Determine startup state | |
863 | |
864 // Buffer near and far end signals | |
865 memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); | |
866 memcpy(aecm->dBufNoisy + PART_LEN, | |
867 nearendNoisy, | |
868 sizeof(int16_t) * PART_LEN); | |
869 if (nearendClean != NULL) { | |
870 memcpy(aecm->dBufClean + PART_LEN, | |
871 nearendClean, | |
872 sizeof(int16_t) * PART_LEN); | |
873 } | |
874 | |
875 // Transform far end signal from time domain to frequency domain. | |
876 far_q = TimeToFrequencyDomain(aecm, | |
877 aecm->xBuf, | |
878 dfw, | |
879 xfa, | |
880 &xfaSum); | |
881 | |
882 // Transform noisy near end signal from time domain to frequency domain. | |
883 zerosDBufNoisy = TimeToFrequencyDomain(aecm, | |
884 aecm->dBufNoisy, | |
885 dfw, | |
886 dfaNoisy, | |
887 &dfaNoisySum); | |
888 aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; | |
889 aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; | |
890 | |
891 if (nearendClean == NULL) { | |
892 ptrDfaClean = dfaNoisy; | |
893 aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; | |
894 aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; | |
895 dfaCleanSum = dfaNoisySum; | |
896 } else { | |
897 // Transform clean near end signal from time domain to frequency domain. | |
898 zerosDBufClean = TimeToFrequencyDomain(aecm, | |
899 aecm->dBufClean, | |
900 dfw, | |
901 dfaClean, | |
902 &dfaCleanSum); | |
903 aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; | |
904 aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; | |
905 } | |
906 | |
907 // Get the delay | |
908 // Save far-end history and estimate delay | |
909 WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); | |
910 | |
911 if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, | |
912 far_q) == -1) { | |
913 return -1; | |
914 } | |
915 delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, | |
916 dfaNoisy, | |
917 PART_LEN1, | |
918 zerosDBufNoisy); | |
919 if (delay == -1) { | |
920 return -1; | |
921 } | |
922 else if (delay == -2) { | |
923 // If the delay is unknown, we assume zero. | |
924 // NOTE: this will have to be adjusted if we ever add lookahead. | |
925 delay = 0; | |
926 } | |
927 | |
928 if (aecm->fixedDelay >= 0) { | |
929 // Use fixed delay | |
930 delay = aecm->fixedDelay; | |
931 } | |
932 | |
933 // Get aligned far end spectrum | |
934 far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); | |
935 zerosXBuf = (int16_t) far_q; | |
936 | |
937 if (far_spectrum_ptr == NULL) { | |
938 return -1; | |
939 } | |
940 | |
941 // Calculate log(energy) and update energy threshold levels | |
942 WebRtcAecm_CalcEnergies(aecm, | |
943 far_spectrum_ptr, | |
944 zerosXBuf, | |
945 dfaNoisySum, | |
946 echoEst32); | |
947 // Calculate stepsize | |
948 mu = WebRtcAecm_CalcStepSize(aecm); | |
949 | |
950 // Update counters | |
951 aecm->totCount++; | |
952 | |
953 // This is the channel estimation algorithm. | |
954 // It is base on NLMS but has a variable step length, | |
955 // which was calculated above. | |
956 WebRtcAecm_UpdateChannel(aecm, | |
957 far_spectrum_ptr, | |
958 zerosXBuf, | |
959 dfaNoisy, | |
960 mu, | |
961 echoEst32); | |
962 | |
963 supGain = WebRtcAecm_CalcSuppressionGain(aecm); | |
964 | |
965 // Calculate Wiener filter hnl[] | |
966 for (i = 0; i < PART_LEN1; i++) { | |
967 // Far end signal through channel estimate in Q8 | |
968 // How much can we shift right to preserve resolution | |
969 tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; | |
970 aecm->echoFilt[i] += (tmp32no1 * 50) >> 8; | |
971 | |
972 zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; | |
973 zeros16 = WebRtcSpl_NormW16(supGain) + 1; | |
974 if (zeros32 + zeros16 > 16) { | |
975 // Multiplication is safe | |
976 // Result in | |
977 // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) | |
978 echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], | |
979 (uint16_t)supGain); | |
980 resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; | |
981 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); | |
982 } else { | |
983 tmp16no1 = 17 - zeros32 - zeros16; | |
984 resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - | |
985 RESOLUTION_SUPGAIN; | |
986 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); | |
987 if (zeros32 > tmp16no1) { | |
988 echoEst32Gained = WEBRTC_SPL_UMUL_32_16( | |
989 (uint32_t)aecm->echoFilt[i], | |
990 supGain >> tmp16no1); | |
991 } else { | |
992 // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) | |
993 echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; | |
994 } | |
995 } | |
996 | |
997 zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); | |
998 assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative. | |
999 dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; | |
1000 if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { | |
1001 tmp16no1 = aecm->nearFilt[i] << zeros16; | |
1002 qDomainDiff = zeros16 - dfa_clean_q_domain_diff; | |
1003 tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; | |
1004 } else { | |
1005 tmp16no1 = dfa_clean_q_domain_diff < 0 | |
1006 ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff | |
1007 : aecm->nearFilt[i] << dfa_clean_q_domain_diff; | |
1008 qDomainDiff = 0; | |
1009 tmp16no2 = ptrDfaClean[i]; | |
1010 } | |
1011 | |
1012 tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); | |
1013 tmp16no2 = (int16_t)(tmp32no1 >> 4); | |
1014 tmp16no2 += tmp16no1; | |
1015 zeros16 = WebRtcSpl_NormW16(tmp16no2); | |
1016 if ((tmp16no2) & (-qDomainDiff > zeros16)) { | |
1017 aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; | |
1018 } else { | |
1019 aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff | |
1020 : tmp16no2 >> qDomainDiff; | |
1021 } | |
1022 | |
1023 // Wiener filter coefficients, resulting hnl in Q14 | |
1024 if (echoEst32Gained == 0) { | |
1025 hnl[i] = ONE_Q14; | |
1026 numPosCoef++; | |
1027 } else if (aecm->nearFilt[i] == 0) { | |
1028 hnl[i] = 0; | |
1029 } else { | |
1030 // Multiply the suppression gain | |
1031 // Rounding | |
1032 echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); | |
1033 tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, | |
1034 (uint16_t)aecm->nearFilt[i]); | |
1035 | |
1036 // Current resolution is | |
1037 // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN | |
1038 // - max(0, 17 - zeros16 - zeros32)) | |
1039 // Make sure we are in Q14 | |
1040 tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); | |
1041 if (tmp32no1 > ONE_Q14) { | |
1042 hnl[i] = 0; | |
1043 } else if (tmp32no1 < 0) { | |
1044 hnl[i] = ONE_Q14; | |
1045 numPosCoef++; | |
1046 } else { | |
1047 // 1-echoEst/dfa | |
1048 hnl[i] = ONE_Q14 - (int16_t)tmp32no1; | |
1049 if (hnl[i] <= 0) { | |
1050 hnl[i] = 0; | |
1051 } else { | |
1052 numPosCoef++; | |
1053 } | |
1054 } | |
1055 } | |
1056 } | |
1057 | |
1058 // Only in wideband. Prevent the gain in upper band from being larger than | |
1059 // in lower band. | |
1060 if (aecm->mult == 2) { | |
1061 // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause | |
1062 // speech distortion in double-talk. | |
1063 for (i = 0; i < (PART_LEN1 >> 3); i++) { | |
1064 __asm __volatile ( | |
1065 "lh %[temp1], 0(%[ptr1]) \n\t" | |
1066 "lh %[temp2], 2(%[ptr1]) \n\t" | |
1067 "lh %[temp3], 4(%[ptr1]) \n\t" | |
1068 "lh %[temp4], 6(%[ptr1]) \n\t" | |
1069 "lh %[temp5], 8(%[ptr1]) \n\t" | |
1070 "lh %[temp6], 10(%[ptr1]) \n\t" | |
1071 "lh %[temp7], 12(%[ptr1]) \n\t" | |
1072 "lh %[temp8], 14(%[ptr1]) \n\t" | |
1073 "mul %[temp1], %[temp1], %[temp1] \n\t" | |
1074 "mul %[temp2], %[temp2], %[temp2] \n\t" | |
1075 "mul %[temp3], %[temp3], %[temp3] \n\t" | |
1076 "mul %[temp4], %[temp4], %[temp4] \n\t" | |
1077 "mul %[temp5], %[temp5], %[temp5] \n\t" | |
1078 "mul %[temp6], %[temp6], %[temp6] \n\t" | |
1079 "mul %[temp7], %[temp7], %[temp7] \n\t" | |
1080 "mul %[temp8], %[temp8], %[temp8] \n\t" | |
1081 "sra %[temp1], %[temp1], 14 \n\t" | |
1082 "sra %[temp2], %[temp2], 14 \n\t" | |
1083 "sra %[temp3], %[temp3], 14 \n\t" | |
1084 "sra %[temp4], %[temp4], 14 \n\t" | |
1085 "sra %[temp5], %[temp5], 14 \n\t" | |
1086 "sra %[temp6], %[temp6], 14 \n\t" | |
1087 "sra %[temp7], %[temp7], 14 \n\t" | |
1088 "sra %[temp8], %[temp8], 14 \n\t" | |
1089 "sh %[temp1], 0(%[ptr1]) \n\t" | |
1090 "sh %[temp2], 2(%[ptr1]) \n\t" | |
1091 "sh %[temp3], 4(%[ptr1]) \n\t" | |
1092 "sh %[temp4], 6(%[ptr1]) \n\t" | |
1093 "sh %[temp5], 8(%[ptr1]) \n\t" | |
1094 "sh %[temp6], 10(%[ptr1]) \n\t" | |
1095 "sh %[temp7], 12(%[ptr1]) \n\t" | |
1096 "sh %[temp8], 14(%[ptr1]) \n\t" | |
1097 "addiu %[ptr1], %[ptr1], 16 \n\t" | |
1098 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), | |
1099 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), | |
1100 [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1) | |
1101 : | |
1102 : "memory", "hi", "lo" | |
1103 ); | |
1104 } | |
1105 for(i = 0; i < (PART_LEN1 & 7); i++) { | |
1106 __asm __volatile ( | |
1107 "lh %[temp1], 0(%[ptr1]) \n\t" | |
1108 "mul %[temp1], %[temp1], %[temp1] \n\t" | |
1109 "sra %[temp1], %[temp1], 14 \n\t" | |
1110 "sh %[temp1], 0(%[ptr1]) \n\t" | |
1111 "addiu %[ptr1], %[ptr1], 2 \n\t" | |
1112 : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1) | |
1113 : | |
1114 : "memory", "hi", "lo" | |
1115 ); | |
1116 } | |
1117 | |
1118 for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { | |
1119 avgHnl32 += (int32_t)hnl[i]; | |
1120 } | |
1121 | |
1122 assert(kMaxPrefBand - kMinPrefBand + 1 > 0); | |
1123 avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); | |
1124 | |
1125 for (i = kMaxPrefBand; i < PART_LEN1; i++) { | |
1126 if (hnl[i] > (int16_t)avgHnl32) { | |
1127 hnl[i] = (int16_t)avgHnl32; | |
1128 } | |
1129 } | |
1130 } | |
1131 | |
1132 // Calculate NLP gain, result is in Q14 | |
1133 if (aecm->nlpFlag) { | |
1134 if (numPosCoef < 3) { | |
1135 for (i = 0; i < PART_LEN1; i++) { | |
1136 efw[i].real = 0; | |
1137 efw[i].imag = 0; | |
1138 hnl[i] = 0; | |
1139 } | |
1140 } else { | |
1141 for (i = 0; i < PART_LEN1; i++) { | |
1142 #if defined(MIPS_DSP_R1_LE) | |
1143 __asm __volatile ( | |
1144 ".set push \n\t" | |
1145 ".set noreorder \n\t" | |
1146 "lh %[temp1], 0(%[ptr]) \n\t" | |
1147 "lh %[temp2], 0(%[dr_ptr]) \n\t" | |
1148 "slti %[temp4], %[temp1], 0x4001 \n\t" | |
1149 "beqz %[temp4], 3f \n\t" | |
1150 " lh %[temp3], 2(%[dr_ptr]) \n\t" | |
1151 "slti %[temp5], %[temp1], 3277 \n\t" | |
1152 "bnez %[temp5], 2f \n\t" | |
1153 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" | |
1154 "mul %[temp2], %[temp2], %[temp1] \n\t" | |
1155 "mul %[temp3], %[temp3], %[temp1] \n\t" | |
1156 "shra_r.w %[temp2], %[temp2], 14 \n\t" | |
1157 "shra_r.w %[temp3], %[temp3], 14 \n\t" | |
1158 "b 4f \n\t" | |
1159 " nop \n\t" | |
1160 "2: \n\t" | |
1161 "addu %[temp1], $zero, $zero \n\t" | |
1162 "addu %[temp2], $zero, $zero \n\t" | |
1163 "addu %[temp3], $zero, $zero \n\t" | |
1164 "b 1f \n\t" | |
1165 " nop \n\t" | |
1166 "3: \n\t" | |
1167 "addiu %[temp1], $0, 0x4000 \n\t" | |
1168 "1: \n\t" | |
1169 "sh %[temp1], 0(%[ptr]) \n\t" | |
1170 "4: \n\t" | |
1171 "sh %[temp2], 0(%[er_ptr]) \n\t" | |
1172 "sh %[temp3], 2(%[er_ptr]) \n\t" | |
1173 "addiu %[ptr], %[ptr], 2 \n\t" | |
1174 "addiu %[er_ptr], %[er_ptr], 4 \n\t" | |
1175 ".set pop \n\t" | |
1176 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), | |
1177 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), | |
1178 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) | |
1179 : | |
1180 : "memory", "hi", "lo" | |
1181 ); | |
1182 #else | |
1183 __asm __volatile ( | |
1184 ".set push \n\t" | |
1185 ".set noreorder \n\t" | |
1186 "lh %[temp1], 0(%[ptr]) \n\t" | |
1187 "lh %[temp2], 0(%[dr_ptr]) \n\t" | |
1188 "slti %[temp4], %[temp1], 0x4001 \n\t" | |
1189 "beqz %[temp4], 3f \n\t" | |
1190 " lh %[temp3], 2(%[dr_ptr]) \n\t" | |
1191 "slti %[temp5], %[temp1], 3277 \n\t" | |
1192 "bnez %[temp5], 2f \n\t" | |
1193 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" | |
1194 "mul %[temp2], %[temp2], %[temp1] \n\t" | |
1195 "mul %[temp3], %[temp3], %[temp1] \n\t" | |
1196 "addiu %[temp2], %[temp2], 0x2000 \n\t" | |
1197 "addiu %[temp3], %[temp3], 0x2000 \n\t" | |
1198 "sra %[temp2], %[temp2], 14 \n\t" | |
1199 "sra %[temp3], %[temp3], 14 \n\t" | |
1200 "b 4f \n\t" | |
1201 " nop \n\t" | |
1202 "2: \n\t" | |
1203 "addu %[temp1], $zero, $zero \n\t" | |
1204 "addu %[temp2], $zero, $zero \n\t" | |
1205 "addu %[temp3], $zero, $zero \n\t" | |
1206 "b 1f \n\t" | |
1207 " nop \n\t" | |
1208 "3: \n\t" | |
1209 "addiu %[temp1], $0, 0x4000 \n\t" | |
1210 "1: \n\t" | |
1211 "sh %[temp1], 0(%[ptr]) \n\t" | |
1212 "4: \n\t" | |
1213 "sh %[temp2], 0(%[er_ptr]) \n\t" | |
1214 "sh %[temp3], 2(%[er_ptr]) \n\t" | |
1215 "addiu %[ptr], %[ptr], 2 \n\t" | |
1216 "addiu %[er_ptr], %[er_ptr], 4 \n\t" | |
1217 ".set pop \n\t" | |
1218 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), | |
1219 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), | |
1220 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) | |
1221 : | |
1222 : "memory", "hi", "lo" | |
1223 ); | |
1224 #endif | |
1225 } | |
1226 } | |
1227 } | |
1228 else { | |
1229 // multiply with Wiener coefficients | |
1230 for (i = 0; i < PART_LEN1; i++) { | |
1231 efw[i].real = (int16_t) | |
1232 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, | |
1233 hnl[i], | |
1234 14)); | |
1235 efw[i].imag = (int16_t) | |
1236 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, | |
1237 hnl[i], | |
1238 14)); | |
1239 } | |
1240 } | |
1241 | |
1242 if (aecm->cngMode == AecmTrue) { | |
1243 ComfortNoise(aecm, ptrDfaClean, efw, hnl); | |
1244 } | |
1245 | |
1246 InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); | |
1247 | |
1248 return 0; | |
1249 } | |
1250 | |
1251 // Generate comfort noise and add to output signal. | |
1252 static void ComfortNoise(AecmCore* aecm, | |
1253 const uint16_t* dfa, | |
1254 ComplexInt16* out, | |
1255 const int16_t* lambda) { | |
1256 int16_t i; | |
1257 int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; | |
1258 int32_t tmp32, tmp321, tnoise, tnoise1; | |
1259 int32_t tmp322, tmp323, *tmp1; | |
1260 int16_t* dfap; | |
1261 int16_t* lambdap; | |
1262 const int32_t c2049 = 2049; | |
1263 const int32_t c359 = 359; | |
1264 const int32_t c114 = ONE_Q14; | |
1265 | |
1266 int16_t randW16[PART_LEN]; | |
1267 int16_t uReal[PART_LEN1]; | |
1268 int16_t uImag[PART_LEN1]; | |
1269 int32_t outLShift32; | |
1270 | |
1271 int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; | |
1272 int16_t minTrackShift = 9; | |
1273 | |
1274 assert(shiftFromNearToNoise >= 0); | |
1275 assert(shiftFromNearToNoise < 16); | |
1276 | |
1277 if (aecm->noiseEstCtr < 100) { | |
1278 // Track the minimum more quickly initially. | |
1279 aecm->noiseEstCtr++; | |
1280 minTrackShift = 6; | |
1281 } | |
1282 | |
1283 // Generate a uniform random array on [0 2^15-1]. | |
1284 WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); | |
1285 int16_t* randW16p = (int16_t*)randW16; | |
1286 #if defined (MIPS_DSP_R1_LE) | |
1287 int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; | |
1288 int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; | |
1289 #endif // #if defined(MIPS_DSP_R1_LE) | |
1290 tmp1 = (int32_t*)aecm->noiseEst + 1; | |
1291 dfap = (int16_t*)dfa + 1; | |
1292 lambdap = (int16_t*)lambda + 1; | |
1293 // Estimate noise power. | |
1294 for (i = 1; i < PART_LEN1; i+=2) { | |
1295 // Shift to the noise domain. | |
1296 __asm __volatile ( | |
1297 "lh %[tmp32], 0(%[dfap]) \n\t" | |
1298 "lw %[tnoise], 0(%[tmp1]) \n\t" | |
1299 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" | |
1300 : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32), | |
1301 [tnoise] "=&r" (tnoise) | |
1302 : [tmp1] "r" (tmp1), [dfap] "r" (dfap), | |
1303 [shiftFromNearToNoise] "r" (shiftFromNearToNoise) | |
1304 : "memory" | |
1305 ); | |
1306 | |
1307 if (outLShift32 < tnoise) { | |
1308 // Reset "too low" counter | |
1309 aecm->noiseEstTooLowCtr[i] = 0; | |
1310 // Track the minimum. | |
1311 if (tnoise < (1 << minTrackShift)) { | |
1312 // For small values, decrease noiseEst[i] every | |
1313 // |kNoiseEstIncCount| block. The regular approach below can not | |
1314 // go further down due to truncation. | |
1315 aecm->noiseEstTooHighCtr[i]++; | |
1316 if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { | |
1317 tnoise--; | |
1318 aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter | |
1319 } | |
1320 } else { | |
1321 __asm __volatile ( | |
1322 "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" | |
1323 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" | |
1324 "subu %[tnoise], %[tnoise], %[tmp32] \n\t" | |
1325 : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise) | |
1326 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) | |
1327 ); | |
1328 } | |
1329 } else { | |
1330 // Reset "too high" counter | |
1331 aecm->noiseEstTooHighCtr[i] = 0; | |
1332 // Ramp slowly upwards until we hit the minimum again. | |
1333 if ((tnoise >> 19) <= 0) { | |
1334 if ((tnoise >> 11) > 0) { | |
1335 // Large enough for relative increase | |
1336 __asm __volatile ( | |
1337 "mul %[tnoise], %[tnoise], %[c2049] \n\t" | |
1338 "sra %[tnoise], %[tnoise], 11 \n\t" | |
1339 : [tnoise] "+r" (tnoise) | |
1340 : [c2049] "r" (c2049) | |
1341 : "hi", "lo" | |
1342 ); | |
1343 } else { | |
1344 // Make incremental increases based on size every | |
1345 // |kNoiseEstIncCount| block | |
1346 aecm->noiseEstTooLowCtr[i]++; | |
1347 if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { | |
1348 __asm __volatile ( | |
1349 "sra %[tmp32], %[tnoise], 9 \n\t" | |
1350 "addi %[tnoise], %[tnoise], 1 \n\t" | |
1351 "addu %[tnoise], %[tnoise], %[tmp32] \n\t" | |
1352 : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32) | |
1353 : | |
1354 ); | |
1355 aecm->noiseEstTooLowCtr[i] = 0; // Reset counter | |
1356 } | |
1357 } | |
1358 } else { | |
1359 // Avoid overflow. | |
1360 // Multiplication with 2049 will cause wrap around. Scale | |
1361 // down first and then multiply | |
1362 __asm __volatile ( | |
1363 "sra %[tnoise], %[tnoise], 11 \n\t" | |
1364 "mul %[tnoise], %[tnoise], %[c2049] \n\t" | |
1365 : [tnoise] "+r" (tnoise) | |
1366 : [c2049] "r" (c2049) | |
1367 : "hi", "lo" | |
1368 ); | |
1369 } | |
1370 } | |
1371 | |
1372 // Shift to the noise domain. | |
1373 __asm __volatile ( | |
1374 "lh %[tmp32], 2(%[dfap]) \n\t" | |
1375 "lw %[tnoise1], 4(%[tmp1]) \n\t" | |
1376 "addiu %[dfap], %[dfap], 4 \n\t" | |
1377 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" | |
1378 : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap), | |
1379 [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1) | |
1380 : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise) | |
1381 : "memory" | |
1382 ); | |
1383 | |
1384 if (outLShift32 < tnoise1) { | |
1385 // Reset "too low" counter | |
1386 aecm->noiseEstTooLowCtr[i + 1] = 0; | |
1387 // Track the minimum. | |
1388 if (tnoise1 < (1 << minTrackShift)) { | |
1389 // For small values, decrease noiseEst[i] every | |
1390 // |kNoiseEstIncCount| block. The regular approach below can not | |
1391 // go further down due to truncation. | |
1392 aecm->noiseEstTooHighCtr[i + 1]++; | |
1393 if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { | |
1394 tnoise1--; | |
1395 aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter | |
1396 } | |
1397 } else { | |
1398 __asm __volatile ( | |
1399 "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" | |
1400 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" | |
1401 "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" | |
1402 : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1) | |
1403 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) | |
1404 ); | |
1405 } | |
1406 } else { | |
1407 // Reset "too high" counter | |
1408 aecm->noiseEstTooHighCtr[i + 1] = 0; | |
1409 // Ramp slowly upwards until we hit the minimum again. | |
1410 if ((tnoise1 >> 19) <= 0) { | |
1411 if ((tnoise1 >> 11) > 0) { | |
1412 // Large enough for relative increase | |
1413 __asm __volatile ( | |
1414 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" | |
1415 "sra %[tnoise1], %[tnoise1], 11 \n\t" | |
1416 : [tnoise1] "+r" (tnoise1) | |
1417 : [c2049] "r" (c2049) | |
1418 : "hi", "lo" | |
1419 ); | |
1420 } else { | |
1421 // Make incremental increases based on size every | |
1422 // |kNoiseEstIncCount| block | |
1423 aecm->noiseEstTooLowCtr[i + 1]++; | |
1424 if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { | |
1425 __asm __volatile ( | |
1426 "sra %[tmp32], %[tnoise1], 9 \n\t" | |
1427 "addi %[tnoise1], %[tnoise1], 1 \n\t" | |
1428 "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" | |
1429 : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32) | |
1430 : | |
1431 ); | |
1432 aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter | |
1433 } | |
1434 } | |
1435 } else { | |
1436 // Avoid overflow. | |
1437 // Multiplication with 2049 will cause wrap around. Scale | |
1438 // down first and then multiply | |
1439 __asm __volatile ( | |
1440 "sra %[tnoise1], %[tnoise1], 11 \n\t" | |
1441 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" | |
1442 : [tnoise1] "+r" (tnoise1) | |
1443 : [c2049] "r" (c2049) | |
1444 : "hi", "lo" | |
1445 ); | |
1446 } | |
1447 } | |
1448 | |
1449 __asm __volatile ( | |
1450 "lh %[tmp16], 0(%[lambdap]) \n\t" | |
1451 "lh %[tmp161], 2(%[lambdap]) \n\t" | |
1452 "sw %[tnoise], 0(%[tmp1]) \n\t" | |
1453 "sw %[tnoise1], 4(%[tmp1]) \n\t" | |
1454 "subu %[tmp16], %[c114], %[tmp16] \n\t" | |
1455 "subu %[tmp161], %[c114], %[tmp161] \n\t" | |
1456 "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" | |
1457 "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" | |
1458 "addiu %[lambdap], %[lambdap], 4 \n\t" | |
1459 "addiu %[tmp1], %[tmp1], 8 \n\t" | |
1460 : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1), | |
1461 [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap) | |
1462 : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114), | |
1463 [shiftFromNearToNoise] "r" (shiftFromNearToNoise) | |
1464 : "memory" | |
1465 ); | |
1466 | |
1467 if (tmp32 > 32767) { | |
1468 tmp32 = 32767; | |
1469 aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; | |
1470 } | |
1471 if (tmp321 > 32767) { | |
1472 tmp321 = 32767; | |
1473 aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise; | |
1474 } | |
1475 | |
1476 __asm __volatile ( | |
1477 "mul %[tmp32], %[tmp32], %[tmp16] \n\t" | |
1478 "mul %[tmp321], %[tmp321], %[tmp161] \n\t" | |
1479 "sra %[nrsh1], %[tmp32], 14 \n\t" | |
1480 "sra %[nrsh2], %[tmp321], 14 \n\t" | |
1481 : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2) | |
1482 : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32), | |
1483 [tmp321] "r" (tmp321) | |
1484 : "memory", "hi", "lo" | |
1485 ); | |
1486 | |
1487 __asm __volatile ( | |
1488 "lh %[tmp32], 0(%[randW16p]) \n\t" | |
1489 "lh %[tmp321], 2(%[randW16p]) \n\t" | |
1490 "addiu %[randW16p], %[randW16p], 4 \n\t" | |
1491 "mul %[tmp32], %[tmp32], %[c359] \n\t" | |
1492 "mul %[tmp321], %[tmp321], %[c359] \n\t" | |
1493 "sra %[tmp16], %[tmp32], 15 \n\t" | |
1494 "sra %[tmp161], %[tmp321], 15 \n\t" | |
1495 : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32), | |
1496 [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321) | |
1497 : [c359] "r" (c359) | |
1498 : "memory", "hi", "lo" | |
1499 ); | |
1500 | |
1501 #if !defined(MIPS_DSP_R1_LE) | |
1502 tmp32 = WebRtcAecm_kCosTable[tmp16]; | |
1503 tmp321 = WebRtcAecm_kSinTable[tmp16]; | |
1504 tmp322 = WebRtcAecm_kCosTable[tmp161]; | |
1505 tmp323 = WebRtcAecm_kSinTable[tmp161]; | |
1506 #else | |
1507 __asm __volatile ( | |
1508 "sll %[tmp16], %[tmp16], 1 \n\t" | |
1509 "sll %[tmp161], %[tmp161], 1 \n\t" | |
1510 "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" | |
1511 "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" | |
1512 "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" | |
1513 "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" | |
1514 : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), | |
1515 [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323) | |
1516 : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16), | |
1517 [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep) | |
1518 : "memory" | |
1519 ); | |
1520 #endif | |
1521 __asm __volatile ( | |
1522 "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" | |
1523 "negu %[tmp162], %[nrsh1] \n\t" | |
1524 "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" | |
1525 "negu %[tmp163], %[nrsh2] \n\t" | |
1526 "sra %[tmp32], %[tmp32], 13 \n\t" | |
1527 "mul %[tmp321], %[tmp321], %[tmp162] \n\t" | |
1528 "sra %[tmp322], %[tmp322], 13 \n\t" | |
1529 "mul %[tmp323], %[tmp323], %[tmp163] \n\t" | |
1530 "sra %[tmp321], %[tmp321], 13 \n\t" | |
1531 "sra %[tmp323], %[tmp323], 13 \n\t" | |
1532 : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162), | |
1533 [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163) | |
1534 : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2) | |
1535 : "hi", "lo" | |
1536 ); | |
1537 // Tables are in Q13. | |
1538 uReal[i] = (int16_t)tmp32; | |
1539 uImag[i] = (int16_t)tmp321; | |
1540 uReal[i + 1] = (int16_t)tmp322; | |
1541 uImag[i + 1] = (int16_t)tmp323; | |
1542 } | |
1543 | |
1544 int32_t tt, sgn; | |
1545 tt = out[0].real; | |
1546 sgn = ((int)tt) >> 31; | |
1547 out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); | |
1548 tt = out[0].imag; | |
1549 sgn = ((int)tt) >> 31; | |
1550 out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); | |
1551 for (i = 1; i < PART_LEN; i++) { | |
1552 tt = out[i].real + uReal[i]; | |
1553 sgn = ((int)tt) >> 31; | |
1554 out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); | |
1555 tt = out[i].imag + uImag[i]; | |
1556 sgn = ((int)tt) >> 31; | |
1557 out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); | |
1558 } | |
1559 tt = out[PART_LEN].real + uReal[PART_LEN]; | |
1560 sgn = ((int)tt) >> 31; | |
1561 out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); | |
1562 tt = out[PART_LEN].imag; | |
1563 sgn = ((int)tt) >> 31; | |
1564 out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); | |
1565 } | |
1566 | |
OLD | NEW |