webrtc/modules/audio_processing/aecm/aecm_core_mips.c - Issue 1857153002: Changed AECM to be built using C++

Side by Side Diff: webrtc/modules/audio_processing/aecm/aecm_core_mips.c

Issue 1857153002: Changed AECM to be built using C++ (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Added external declaration to the delay estimator wrapper inclusion Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.

3 *

4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.

9 */

10

11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h"

12

13 #include <assert.h>

14

15 #include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h"

16 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"

17

18 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {

19 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,

20 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,

21 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,

22 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,

23 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,

24 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,

25 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,

26 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384

27 };

28

29 static const int16_t kNoiseEstQDomain = 15;

30 static const int16_t kNoiseEstIncCount = 5;

31

32 static int16_t coefTable[] = {

33 0, 4, 256, 260, 128, 132, 384, 388,

34 64, 68, 320, 324, 192, 196, 448, 452,

35 32, 36, 288, 292, 160, 164, 416, 420,

36 96, 100, 352, 356, 224, 228, 480, 484,

37 16, 20, 272, 276, 144, 148, 400, 404,

38 80, 84, 336, 340, 208, 212, 464, 468,

39 48, 52, 304, 308, 176, 180, 432, 436,

40 112, 116, 368, 372, 240, 244, 496, 500,

41 8, 12, 264, 268, 136, 140, 392, 396,

42 72, 76, 328, 332, 200, 204, 456, 460,

43 40, 44, 296, 300, 168, 172, 424, 428,

44 104, 108, 360, 364, 232, 236, 488, 492,

45 24, 28, 280, 284, 152, 156, 408, 412,

46 88, 92, 344, 348, 216, 220, 472, 476,

47 56, 60, 312, 316, 184, 188, 440, 444,

48 120, 124, 376, 380, 248, 252, 504, 508

49 };

50

51 static int16_t coefTable_ifft[] = {

52 0, 512, 256, 508, 128, 252, 384, 380,

53 64, 124, 320, 444, 192, 188, 448, 316,

54 32, 60, 288, 476, 160, 220, 416, 348,

55 96, 92, 352, 412, 224, 156, 480, 284,

56 16, 28, 272, 492, 144, 236, 400, 364,

57 80, 108, 336, 428, 208, 172, 464, 300,

58 48, 44, 304, 460, 176, 204, 432, 332,

59 112, 76, 368, 396, 240, 140, 496, 268,

60 8, 12, 264, 500, 136, 244, 392, 372,

61 72, 116, 328, 436, 200, 180, 456, 308,

62 40, 52, 296, 468, 168, 212, 424, 340,

63 104, 84, 360, 404, 232, 148, 488, 276,

64 24, 20, 280, 484, 152, 228, 408, 356,

65 88, 100, 344, 420, 216, 164, 472, 292,

66 56, 36, 312, 452, 184, 196, 440, 324,

67 120, 68, 376, 388, 248, 132, 504, 260

68 };

69

70 static void ComfortNoise(AecmCore* aecm,

71 const uint16_t* dfa,

72 ComplexInt16* out,

73 const int16_t* lambda);

74

75 static void WindowAndFFT(AecmCore* aecm,

76 int16_t* fft,

77 const int16_t* time_signal,

78 ComplexInt16* freq_signal,

79 int time_signal_scaling) {

80 int i, j;

81 int32_t tmp1, tmp2, tmp3, tmp4;

82 int16_t* pfrfi;

83 ComplexInt16* pfreq_signal;

84 int16_t f_coef, s_coef;

85 int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;

86 int32_t hann, hann1, coefs;

87

88 memset(fft, 0, sizeof(int16_t) * PART_LEN4);

89

90 // FFT of signal

91 __asm __volatile (

92 ".set push \n\t"

93 ".set noreorder \n\t"

94 "addiu %[shift], %[time_signal_scaling], -14 \n\t"

95 "addiu %[i], $zero, 64 \n\t"

96 "addiu %[load_ptr], %[time_signal], 0 \n\t"

97 "addiu %[hann], %[hanning], 0 \n\t"

98 "addiu %[hann1], %[hanning], 128 \n\t"

99 "addiu %[coefs], %[coefTable], 0 \n\t"

100 "bltz %[shift], 2f \n\t"

101 " negu %[shift1], %[shift] \n\t"

102 "1: \n\t"

103 "lh %[tmp1], 0(%[load_ptr]) \n\t"

104 "lh %[tmp2], 0(%[hann]) \n\t"

105 "lh %[tmp3], 128(%[load_ptr]) \n\t"

106 "lh %[tmp4], 0(%[hann1]) \n\t"

107 "addiu %[i], %[i], -1 \n\t"

108 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

109 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

110 "lh %[f_coef], 0(%[coefs]) \n\t"

111 "lh %[s_coef], 2(%[coefs]) \n\t"

112 "addiu %[load_ptr], %[load_ptr], 2 \n\t"

113 "addiu %[hann], %[hann], 2 \n\t"

114 "addiu %[hann1], %[hann1], -2 \n\t"

115 "addu %[store_ptr1], %[fft], %[f_coef] \n\t"

116 "addu %[store_ptr2], %[fft], %[s_coef] \n\t"

117 "sllv %[tmp1], %[tmp1], %[shift] \n\t"

118 "sllv %[tmp3], %[tmp3], %[shift] \n\t"

119 "sh %[tmp1], 0(%[store_ptr1]) \n\t"

120 "sh %[tmp3], 0(%[store_ptr2]) \n\t"

121 "bgtz %[i], 1b \n\t"

122 " addiu %[coefs], %[coefs], 4 \n\t"

123 "b 3f \n\t"

124 " nop \n\t"

125 "2: \n\t"

126 "lh %[tmp1], 0(%[load_ptr]) \n\t"

127 "lh %[tmp2], 0(%[hann]) \n\t"

128 "lh %[tmp3], 128(%[load_ptr]) \n\t"

129 "lh %[tmp4], 0(%[hann1]) \n\t"

130 "addiu %[i], %[i], -1 \n\t"

131 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

132 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

133 "lh %[f_coef], 0(%[coefs]) \n\t"

134 "lh %[s_coef], 2(%[coefs]) \n\t"

135 "addiu %[load_ptr], %[load_ptr], 2 \n\t"

136 "addiu %[hann], %[hann], 2 \n\t"

137 "addiu %[hann1], %[hann1], -2 \n\t"

138 "addu %[store_ptr1], %[fft], %[f_coef] \n\t"

139 "addu %[store_ptr2], %[fft], %[s_coef] \n\t"

140 "srav %[tmp1], %[tmp1], %[shift1] \n\t"

141 "srav %[tmp3], %[tmp3], %[shift1] \n\t"

142 "sh %[tmp1], 0(%[store_ptr1]) \n\t"

143 "sh %[tmp3], 0(%[store_ptr2]) \n\t"

144 "bgtz %[i], 2b \n\t"

145 " addiu %[coefs], %[coefs], 4 \n\t"

146 "3: \n\t"

147 ".set pop \n\t"

148 : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann),

149 [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs),

150 [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),

151 [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef),

152 [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1),

153 [store_ptr2] "=&r" (store_ptr2)

154 : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable),

155 [time_signal_scaling] "r" (time_signal_scaling),

156 [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft)

157 : "memory", "hi", "lo"

158 );

159

160 WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);

161 pfrfi = fft;

162 pfreq_signal = freq_signal;

163

164 __asm __volatile (

165 ".set push \n\t"

166 ".set noreorder \n\t"

167 "addiu %[j], $zero, 128 \n\t"

168 "1: \n\t"

169 "lh %[tmp1], 0(%[pfrfi]) \n\t"

170 "lh %[tmp2], 2(%[pfrfi]) \n\t"

171 "lh %[tmp3], 4(%[pfrfi]) \n\t"

172 "lh %[tmp4], 6(%[pfrfi]) \n\t"

173 "subu %[tmp2], $zero, %[tmp2] \n\t"

174 "sh %[tmp1], 0(%[pfreq_signal]) \n\t"

175 "sh %[tmp2], 2(%[pfreq_signal]) \n\t"

176 "subu %[tmp4], $zero, %[tmp4] \n\t"

177 "sh %[tmp3], 4(%[pfreq_signal]) \n\t"

178 "sh %[tmp4], 6(%[pfreq_signal]) \n\t"

179 "lh %[tmp1], 8(%[pfrfi]) \n\t"

180 "lh %[tmp2], 10(%[pfrfi]) \n\t"

181 "lh %[tmp3], 12(%[pfrfi]) \n\t"

182 "lh %[tmp4], 14(%[pfrfi]) \n\t"

183 "addiu %[j], %[j], -8 \n\t"

184 "subu %[tmp2], $zero, %[tmp2] \n\t"

185 "sh %[tmp1], 8(%[pfreq_signal]) \n\t"

186 "sh %[tmp2], 10(%[pfreq_signal]) \n\t"

187 "subu %[tmp4], $zero, %[tmp4] \n\t"

188 "sh %[tmp3], 12(%[pfreq_signal]) \n\t"

189 "sh %[tmp4], 14(%[pfreq_signal]) \n\t"

190 "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t"

191 "bgtz %[j], 1b \n\t"

192 " addiu %[pfrfi], %[pfrfi], 16 \n\t"

193 ".set pop \n\t"

194 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),

195 [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal),

196 [tmp4] "=&r" (tmp4)

197 :

198 : "memory"

199 );

200 }

201

202 static void InverseFFTAndWindow(AecmCore* aecm,

203 int16_t* fft,

204 ComplexInt16* efw,

205 int16_t* output,

206 const int16_t* nearendClean) {

207 int i, outCFFT;

208 int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;

209 int16_t* pcoefTable_ifft = coefTable_ifft;

210 int16_t* pfft = fft;

211 int16_t* ppfft = fft;

212 ComplexInt16* pefw = efw;

213 int32_t out_aecm;

214 int16_t* paecm_buf = aecm->outBuf;

215 const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;

216 const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];

217 int16_t* output1 = output;

218

219 __asm __volatile (

220 ".set push \n\t"

221 ".set noreorder \n\t"

222 "addiu %[i], $zero, 64 \n\t"

223 "1: \n\t"

224 "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t"

225 "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t"

226 "lh %[tmp_re], 0(%[pefw]) \n\t"

227 "lh %[tmp_im], 2(%[pefw]) \n\t"

228 "addu %[pfft], %[fft], %[tmp2] \n\t"

229 "sh %[tmp_re], 0(%[pfft]) \n\t"

230 "sh %[tmp_im], 2(%[pfft]) \n\t"

231 "addu %[pfft], %[fft], %[tmp1] \n\t"

232 "sh %[tmp_re], 0(%[pfft]) \n\t"

233 "subu %[tmp_im], $zero, %[tmp_im] \n\t"

234 "sh %[tmp_im], 2(%[pfft]) \n\t"

235 "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t"

236 "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t"

237 "lh %[tmp_re], 4(%[pefw]) \n\t"

238 "lh %[tmp_im], 6(%[pefw]) \n\t"

239 "addu %[pfft], %[fft], %[tmp2] \n\t"

240 "sh %[tmp_re], 0(%[pfft]) \n\t"

241 "sh %[tmp_im], 2(%[pfft]) \n\t"

242 "addu %[pfft], %[fft], %[tmp1] \n\t"

243 "sh %[tmp_re], 0(%[pfft]) \n\t"

244 "subu %[tmp_im], $zero, %[tmp_im] \n\t"

245 "sh %[tmp_im], 2(%[pfft]) \n\t"

246 "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t"

247 "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t"

248 "lh %[tmp_re], 8(%[pefw]) \n\t"

249 "lh %[tmp_im], 10(%[pefw]) \n\t"

250 "addu %[pfft], %[fft], %[tmp2] \n\t"

251 "sh %[tmp_re], 0(%[pfft]) \n\t"

252 "sh %[tmp_im], 2(%[pfft]) \n\t"

253 "addu %[pfft], %[fft], %[tmp1] \n\t"

254 "sh %[tmp_re], 0(%[pfft]) \n\t"

255 "subu %[tmp_im], $zero, %[tmp_im] \n\t"

256 "sh %[tmp_im], 2(%[pfft]) \n\t"

257 "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t"

258 "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t"

259 "lh %[tmp_re], 12(%[pefw]) \n\t"

260 "lh %[tmp_im], 14(%[pefw]) \n\t"

261 "addu %[pfft], %[fft], %[tmp2] \n\t"

262 "sh %[tmp_re], 0(%[pfft]) \n\t"

263 "sh %[tmp_im], 2(%[pfft]) \n\t"

264 "addu %[pfft], %[fft], %[tmp1] \n\t"

265 "sh %[tmp_re], 0(%[pfft]) \n\t"

266 "subu %[tmp_im], $zero, %[tmp_im] \n\t"

267 "sh %[tmp_im], 2(%[pfft]) \n\t"

268 "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t"

269 "addiu %[i], %[i], -4 \n\t"

270 "bgtz %[i], 1b \n\t"

271 " addiu %[pefw], %[pefw], 16 \n\t"

272 ".set pop \n\t"

273 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),

274 [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im),

275 [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft),

276 [fft] "+r" (fft)

277 :

278 : "memory"

279 );

280

281 fft[2] = efw[PART_LEN].real;

282 fft[3] = -efw[PART_LEN].imag;

283

284 outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);

285 pfft = fft;

286

287 __asm __volatile (

288 ".set push \n\t"

289 ".set noreorder \n\t"

290 "addiu %[i], $zero, 128 \n\t"

291 "1: \n\t"

292 "lh %[tmp1], 0(%[ppfft]) \n\t"

293 "lh %[tmp2], 4(%[ppfft]) \n\t"

294 "lh %[tmp3], 8(%[ppfft]) \n\t"

295 "lh %[tmp4], 12(%[ppfft]) \n\t"

296 "addiu %[i], %[i], -4 \n\t"

297 "sh %[tmp1], 0(%[pfft]) \n\t"

298 "sh %[tmp2], 2(%[pfft]) \n\t"

299 "sh %[tmp3], 4(%[pfft]) \n\t"

300 "sh %[tmp4], 6(%[pfft]) \n\t"

301 "addiu %[ppfft], %[ppfft], 16 \n\t"

302 "bgtz %[i], 1b \n\t"

303 " addiu %[pfft], %[pfft], 8 \n\t"

304 ".set pop \n\t"

305 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),

306 [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),

307 [ppfft] "+r" (ppfft)

308 :

309 : "memory"

310 );

311

312 pfft = fft;

313 out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);

314

315 __asm __volatile (

316 ".set push \n\t"

317 ".set noreorder \n\t"

318 "addiu %[i], $zero, 64 \n\t"

319 "11: \n\t"

320 "lh %[tmp1], 0(%[pfft]) \n\t"

321 "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t"

322 "addiu %[i], %[i], -2 \n\t"

323 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

324 "lh %[tmp3], 2(%[pfft]) \n\t"

325 "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t"

326 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

327 "addiu %[tmp1], %[tmp1], 8192 \n\t"

328 "sra %[tmp1], %[tmp1], 14 \n\t"

329 "addiu %[tmp3], %[tmp3], 8192 \n\t"

330 "sra %[tmp3], %[tmp3], 14 \n\t"

331 "bgez %[out_aecm], 1f \n\t"

332 " negu %[tmp2], %[out_aecm] \n\t"

333 "srav %[tmp1], %[tmp1], %[tmp2] \n\t"

334 "b 2f \n\t"

335 " srav %[tmp3], %[tmp3], %[tmp2] \n\t"

336 "1: \n\t"

337 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"

338 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"

339 "2: \n\t"

340 "lh %[tmp4], 0(%[paecm_buf]) \n\t"

341 "lh %[tmp2], 2(%[paecm_buf]) \n\t"

342 "addu %[tmp3], %[tmp3], %[tmp2] \n\t"

343 "addu %[tmp1], %[tmp1], %[tmp4] \n\t"

344 #if defined(MIPS_DSP_R1_LE)

345 "shll_s.w %[tmp1], %[tmp1], 16 \n\t"

346 "sra %[tmp1], %[tmp1], 16 \n\t"

347 "shll_s.w %[tmp3], %[tmp3], 16 \n\t"

348 "sra %[tmp3], %[tmp3], 16 \n\t"

349 #else // #if defined(MIPS_DSP_R1_LE)

350 "sra %[tmp4], %[tmp1], 31 \n\t"

351 "sra %[tmp2], %[tmp1], 15 \n\t"

352 "beq %[tmp4], %[tmp2], 3f \n\t"

353 " ori %[tmp2], $zero, 0x7fff \n\t"

354 "xor %[tmp1], %[tmp2], %[tmp4] \n\t"

355 "3: \n\t"

356 "sra %[tmp2], %[tmp3], 31 \n\t"

357 "sra %[tmp4], %[tmp3], 15 \n\t"

358 "beq %[tmp2], %[tmp4], 4f \n\t"

359 " ori %[tmp4], $zero, 0x7fff \n\t"

360 "xor %[tmp3], %[tmp4], %[tmp2] \n\t"

361 "4: \n\t"

362 #endif // #if defined(MIPS_DSP_R1_LE)

363 "sh %[tmp1], 0(%[pfft]) \n\t"

364 "sh %[tmp1], 0(%[output1]) \n\t"

365 "sh %[tmp3], 2(%[pfft]) \n\t"

366 "sh %[tmp3], 2(%[output1]) \n\t"

367 "lh %[tmp1], 128(%[pfft]) \n\t"

368 "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t"

369 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

370 "lh %[tmp3], 130(%[pfft]) \n\t"

371 "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t"

372 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

373 "sra %[tmp1], %[tmp1], 14 \n\t"

374 "sra %[tmp3], %[tmp3], 14 \n\t"

375 "bgez %[out_aecm], 5f \n\t"

376 " negu %[tmp2], %[out_aecm] \n\t"

377 "srav %[tmp3], %[tmp3], %[tmp2] \n\t"

378 "b 6f \n\t"

379 " srav %[tmp1], %[tmp1], %[tmp2] \n\t"

380 "5: \n\t"

381 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"

382 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"

383 "6: \n\t"

384 #if defined(MIPS_DSP_R1_LE)

385 "shll_s.w %[tmp1], %[tmp1], 16 \n\t"

386 "sra %[tmp1], %[tmp1], 16 \n\t"

387 "shll_s.w %[tmp3], %[tmp3], 16 \n\t"

388 "sra %[tmp3], %[tmp3], 16 \n\t"

389 #else // #if defined(MIPS_DSP_R1_LE)

390 "sra %[tmp4], %[tmp1], 31 \n\t"

391 "sra %[tmp2], %[tmp1], 15 \n\t"

392 "beq %[tmp4], %[tmp2], 7f \n\t"

393 " ori %[tmp2], $zero, 0x7fff \n\t"

394 "xor %[tmp1], %[tmp2], %[tmp4] \n\t"

395 "7: \n\t"

396 "sra %[tmp2], %[tmp3], 31 \n\t"

397 "sra %[tmp4], %[tmp3], 15 \n\t"

398 "beq %[tmp2], %[tmp4], 8f \n\t"

399 " ori %[tmp4], $zero, 0x7fff \n\t"

400 "xor %[tmp3], %[tmp4], %[tmp2] \n\t"

401 "8: \n\t"

402 #endif // #if defined(MIPS_DSP_R1_LE)

403 "sh %[tmp1], 0(%[paecm_buf]) \n\t"

404 "sh %[tmp3], 2(%[paecm_buf]) \n\t"

405 "addiu %[output1], %[output1], 4 \n\t"

406 "addiu %[paecm_buf], %[paecm_buf], 4 \n\t"

407 "addiu %[pfft], %[pfft], 4 \n\t"

408 "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t"

409 "bgtz %[i], 11b \n\t"

410 " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t"

411 ".set pop \n\t"

412 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),

413 [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),

414 [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i),

415 [pp_kSqrtHanning] "+r" (pp_kSqrtHanning),

416 [p_kSqrtHanning] "+r" (p_kSqrtHanning)

417 : [out_aecm] "r" (out_aecm),

418 [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning)

419 : "hi", "lo","memory"

420 );

421

422 // Copy the current block to the old position

423 // (aecm->outBuf is shifted elsewhere)

424 memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);

425 memcpy(aecm->dBufNoisy,

426 aecm->dBufNoisy + PART_LEN,

427 sizeof(int16_t) * PART_LEN);

428 if (nearendClean != NULL) {

429 memcpy(aecm->dBufClean,

430 aecm->dBufClean + PART_LEN,

431 sizeof(int16_t) * PART_LEN);

432 }

433 }

434

435 void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,

436 const uint16_t* far_spectrum,

437 int32_t* echo_est,

438 uint32_t* far_energy,

439 uint32_t* echo_energy_adapt,

440 uint32_t* echo_energy_stored) {

441 int i;

442 uint32_t par1 = (*far_energy);

443 uint32_t par2 = (*echo_energy_adapt);

444 uint32_t par3 = (*echo_energy_stored);

445 int16_t* ch_stored_p = &(aecm->channelStored[0]);

446 int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);

447 uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));

448 int32_t* echo_p = &(echo_est[0]);

449 int32_t temp0, stored0, echo0, adept0, spectrum0;

450 int32_t stored1, adept1, spectrum1, echo1, temp1;

451

452 // Get energy for the delayed far end signal and estimated

453 // echo using both stored and adapted channels.

454 for (i = 0; i < PART_LEN; i+= 4) {

455 __asm __volatile (

456 ".set push \n\t"

457 ".set noreorder \n\t"

458 "lh %[stored0], 0(%[ch_stored_p]) \n\t"

459 "lhu %[adept0], 0(%[ch_adapt_p]) \n\t"

460 "lhu %[spectrum0], 0(%[spectrum_p]) \n\t"

461 "lh %[stored1], 2(%[ch_stored_p]) \n\t"

462 "lhu %[adept1], 2(%[ch_adapt_p]) \n\t"

463 "lhu %[spectrum1], 2(%[spectrum_p]) \n\t"

464 "mul %[echo0], %[stored0], %[spectrum0] \n\t"

465 "mul %[temp0], %[adept0], %[spectrum0] \n\t"

466 "mul %[echo1], %[stored1], %[spectrum1] \n\t"

467 "mul %[temp1], %[adept1], %[spectrum1] \n\t"

468 "addu %[par1], %[par1], %[spectrum0] \n\t"

469 "addu %[par1], %[par1], %[spectrum1] \n\t"

470 "addiu %[echo_p], %[echo_p], 16 \n\t"

471 "addu %[par3], %[par3], %[echo0] \n\t"

472 "addu %[par2], %[par2], %[temp0] \n\t"

473 "addu %[par3], %[par3], %[echo1] \n\t"

474 "addu %[par2], %[par2], %[temp1] \n\t"

475 "usw %[echo0], -16(%[echo_p]) \n\t"

476 "usw %[echo1], -12(%[echo_p]) \n\t"

477 "lh %[stored0], 4(%[ch_stored_p]) \n\t"

478 "lhu %[adept0], 4(%[ch_adapt_p]) \n\t"

479 "lhu %[spectrum0], 4(%[spectrum_p]) \n\t"

480 "lh %[stored1], 6(%[ch_stored_p]) \n\t"

481 "lhu %[adept1], 6(%[ch_adapt_p]) \n\t"

482 "lhu %[spectrum1], 6(%[spectrum_p]) \n\t"

483 "mul %[echo0], %[stored0], %[spectrum0] \n\t"

484 "mul %[temp0], %[adept0], %[spectrum0] \n\t"

485 "mul %[echo1], %[stored1], %[spectrum1] \n\t"

486 "mul %[temp1], %[adept1], %[spectrum1] \n\t"

487 "addu %[par1], %[par1], %[spectrum0] \n\t"

488 "addu %[par1], %[par1], %[spectrum1] \n\t"

489 "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t"

490 "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t"

491 "addiu %[spectrum_p], %[spectrum_p], 8 \n\t"

492 "addu %[par3], %[par3], %[echo0] \n\t"

493 "addu %[par2], %[par2], %[temp0] \n\t"

494 "addu %[par3], %[par3], %[echo1] \n\t"

495 "addu %[par2], %[par2], %[temp1] \n\t"

496 "usw %[echo0], -8(%[echo_p]) \n\t"

497 "usw %[echo1], -4(%[echo_p]) \n\t"

498 ".set pop \n\t"

499 : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0),

500 [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0),

501 [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3),

502 [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1),

503 [adept1] "=&r" (adept1), [echo1] "=&r" (echo1),

504 [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1),

505 [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p),

506 [spectrum_p] "+r" (spectrum_p)

507 :

508 : "hi", "lo", "memory"

509 );

510 }

511

512 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],

513 far_spectrum[PART_LEN]);

514 par1 += (uint32_t)(far_spectrum[PART_LEN]);

515 par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];

516 par3 += (uint32_t)echo_est[PART_LEN];

517

518 (*far_energy) = par1;

519 (*echo_energy_adapt) = par2;

520 (*echo_energy_stored) = par3;

521 }

522

523 #if defined(MIPS_DSP_R1_LE)

524 void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,

525 const uint16_t* far_spectrum,

526 int32_t* echo_est) {

527 int i;

528 int16_t* temp1;

529 uint16_t* temp8;

530 int32_t temp0, temp2, temp3, temp4, temp5, temp6;

531 int32_t* temp7 = &(echo_est[0]);

532 temp1 = &(aecm->channelStored[0]);

533 temp8 = (uint16_t*)(&far_spectrum[0]);

534

535 // During startup we store the channel every block.

536 memcpy(aecm->channelStored, aecm->channelAdapt16,

537 sizeof(int16_t) * PART_LEN1);

538 // Recalculate echo estimate

539 for (i = 0; i < PART_LEN; i += 4) {

540 __asm __volatile (

541 "ulw %[temp0], 0(%[temp8]) \n\t"

542 "ulw %[temp2], 0(%[temp1]) \n\t"

543 "ulw %[temp4], 4(%[temp8]) \n\t"

544 "ulw %[temp5], 4(%[temp1]) \n\t"

545 "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t"

546 "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t"

547 "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t"

548 "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t"

549 "addiu %[temp7], %[temp7], 16 \n\t"

550 "addiu %[temp1], %[temp1], 8 \n\t"

551 "addiu %[temp8], %[temp8], 8 \n\t"

552 "sra %[temp3], %[temp3], 1 \n\t"

553 "sra %[temp0], %[temp0], 1 \n\t"

554 "sra %[temp6], %[temp6], 1 \n\t"

555 "sra %[temp4], %[temp4], 1 \n\t"

556 "usw %[temp3], -12(%[temp7]) \n\t"

557 "usw %[temp0], -16(%[temp7]) \n\t"

558 "usw %[temp6], -4(%[temp7]) \n\t"

559 "usw %[temp4], -8(%[temp7]) \n\t"

560 : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

561 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),

562 [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7)

563 :

564 : "hi", "lo", "memory"

565 );

566 }

567 echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],

568 far_spectrum[i]);

569 }

570

571 void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) {

572 int i;

573 int32_t* temp3;

574 int16_t* temp0;

575 int32_t temp1, temp2, temp4, temp5;

576

577 temp0 = &(aecm->channelStored[0]);

578 temp3 = &(aecm->channelAdapt32[0]);

579

580 // The stored channel has a significantly lower MSE than the adaptive one for

581 // two consecutive calculations. Reset the adaptive channel.

582 memcpy(aecm->channelAdapt16,

583 aecm->channelStored,

584 sizeof(int16_t) * PART_LEN1);

585

586 // Restore the W32 channel

587 for (i = 0; i < PART_LEN; i += 4) {

588 __asm __volatile (

589 "ulw %[temp1], 0(%[temp0]) \n\t"

590 "ulw %[temp4], 4(%[temp0]) \n\t"

591 "preceq.w.phl %[temp2], %[temp1] \n\t"

592 "preceq.w.phr %[temp1], %[temp1] \n\t"

593 "preceq.w.phl %[temp5], %[temp4] \n\t"

594 "preceq.w.phr %[temp4], %[temp4] \n\t"

595 "addiu %[temp0], %[temp0], 8 \n\t"

596 "usw %[temp2], 4(%[temp3]) \n\t"

597 "usw %[temp1], 0(%[temp3]) \n\t"

598 "usw %[temp5], 12(%[temp3]) \n\t"

599 "usw %[temp4], 8(%[temp3]) \n\t"

600 "addiu %[temp3], %[temp3], 16 \n\t"

601 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),

602 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),

603 [temp3] "+r" (temp3), [temp0] "+r" (temp0)

604 :

605 : "memory"

606 );

607 }

608

609 aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;

610 }

611 #endif // #if defined(MIPS_DSP_R1_LE)

612

613 // Transforms a time domain signal into the frequency domain, outputting the

614 // complex valued signal, absolute value and sum of absolute values.

615 //

616 // time_signal [in] Pointer to time domain signal

617 // freq_signal_real [out] Pointer to real part of frequency domain array

618 // freq_signal_imag [out] Pointer to imaginary part of frequency domain

619 // array

620 // freq_signal_abs [out] Pointer to absolute value of frequency domain

621 // array

622 // freq_signal_sum_abs [out] Pointer to the sum of all absolute values in

623 // the frequency domain array

624 // return value The Q-domain of current frequency values

625 //

626 static int TimeToFrequencyDomain(AecmCore* aecm,

627 const int16_t* time_signal,

628 ComplexInt16* freq_signal,

629 uint16_t* freq_signal_abs,

630 uint32_t* freq_signal_sum_abs) {

631 int i = 0;

632 int time_signal_scaling = 0;

633

634 // In fft_buf, +16 for 32-byte alignment.

635 int16_t fft_buf[PART_LEN4 + 16];

636 int16_t fft = (int16_t ) (((uintptr_t) fft_buf + 31) & ~31);

637

638 int16_t tmp16no1;

639 #if !defined(MIPS_DSP_R2_LE)

640 int32_t tmp32no1;

641 int32_t tmp32no2;

642 int16_t tmp16no2;

643 #else

644 int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;

645 int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;

646 int16_t* freqp;

647 uint16_t* freqabsp;

648 uint32_t freqt0, freqt1, freqt2, freqt3;

649 uint32_t freqs;

650 #endif

651

652 #ifdef AECM_DYNAMIC_Q

653 tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);

654 time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);

655 #endif

656

657 WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);

658

659 // Extract imaginary and real part,

660 // calculate the magnitude for all frequency bins

661 freq_signal[0].imag = 0;

662 freq_signal[PART_LEN].imag = 0;

663 freq_signal[PART_LEN].real = fft[PART_LEN2];

664 freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);

665 freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(

666 freq_signal[PART_LEN].real);

667 (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +

668 (uint32_t)(freq_signal_abs[PART_LEN]);

669

670 #if !defined(MIPS_DSP_R2_LE)

671 for (i = 1; i < PART_LEN; i++) {

672 if (freq_signal[i].real == 0)

673 {

674 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(

675 freq_signal[i].imag);

676 }

677 else if (freq_signal[i].imag == 0)

678 {

679 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(

680 freq_signal[i].real);

681 }

682 else

683 {

684 // Approximation for magnitude of complex fft output

685 // magn = sqrt(real^2 + imag^2)

686 // magn ~= alpha * max(\|imag\|,\|real\|) + beta * min(\|imag\|,\|real\|)

687 //

688 // The parameters alpha and beta are stored in Q15

689 tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);

690 tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);

691 tmp32no1 = tmp16no1 * tmp16no1;

692 tmp32no2 = tmp16no2 * tmp16no2;

693 tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);

694 tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);

695

696 freq_signal_abs[i] = (uint16_t)tmp32no1;

697 }

698 (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];

699 }

700 #else // #if !defined(MIPS_DSP_R2_LE)

701 freqs = (uint32_t)(freq_signal_abs[0]) +

702 (uint32_t)(freq_signal_abs[PART_LEN]);

703 freqp = &(freq_signal[1].real);

704

705 __asm __volatile (

706 "lw %[freqt0], 0(%[freqp]) \n\t"

707 "lw %[freqt1], 4(%[freqp]) \n\t"

708 "lw %[freqt2], 8(%[freqp]) \n\t"

709 "mult $ac0, $zero, $zero \n\t"

710 "mult $ac1, $zero, $zero \n\t"

711 "mult $ac2, $zero, $zero \n\t"

712 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"

713 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"

714 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"

715 "addiu %[freqp], %[freqp], 12 \n\t"

716 "extr.w %[tmp32no20], $ac0, 1 \n\t"

717 "extr.w %[tmp32no21], $ac1, 1 \n\t"

718 "extr.w %[tmp32no22], $ac2, 1 \n\t"

719 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),

720 [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp),

721 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),

722 [tmp32no22] "=r" (tmp32no22)

723 :

724 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"

725 );

726

727 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);

728 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);

729 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);

730 freq_signal_abs[1] = (uint16_t)tmp32no10;

731 freq_signal_abs[2] = (uint16_t)tmp32no11;

732 freq_signal_abs[3] = (uint16_t)tmp32no12;

733 freqs += (uint32_t)tmp32no10;

734 freqs += (uint32_t)tmp32no11;

735 freqs += (uint32_t)tmp32no12;

736 freqabsp = &(freq_signal_abs[4]);

737 for (i = 4; i < PART_LEN; i+=4)

738 {

739 __asm __volatile (

740 "ulw %[freqt0], 0(%[freqp]) \n\t"

741 "ulw %[freqt1], 4(%[freqp]) \n\t"

742 "ulw %[freqt2], 8(%[freqp]) \n\t"

743 "ulw %[freqt3], 12(%[freqp]) \n\t"

744 "mult $ac0, $zero, $zero \n\t"

745 "mult $ac1, $zero, $zero \n\t"

746 "mult $ac2, $zero, $zero \n\t"

747 "mult $ac3, $zero, $zero \n\t"

748 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"

749 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"

750 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"

751 "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t"

752 "addiu %[freqp], %[freqp], 16 \n\t"

753 "addiu %[freqabsp], %[freqabsp], 8 \n\t"

754 "extr.w %[tmp32no20], $ac0, 1 \n\t"

755 "extr.w %[tmp32no21], $ac1, 1 \n\t"

756 "extr.w %[tmp32no22], $ac2, 1 \n\t"

757 "extr.w %[tmp32no23], $ac3, 1 \n\t"

758 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),

759 [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3),

760 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),

761 [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23),

762 [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp)

763 :

764 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",

765 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"

766 );

767

768 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);

769 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);

770 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);

771 tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);

772

773 __asm __volatile (

774 "sh %[tmp32no10], -8(%[freqabsp]) \n\t"

775 "sh %[tmp32no11], -6(%[freqabsp]) \n\t"

776 "sh %[tmp32no12], -4(%[freqabsp]) \n\t"

777 "sh %[tmp32no13], -2(%[freqabsp]) \n\t"

778 "addu %[freqs], %[freqs], %[tmp32no10] \n\t"

779 "addu %[freqs], %[freqs], %[tmp32no11] \n\t"

780 "addu %[freqs], %[freqs], %[tmp32no12] \n\t"

781 "addu %[freqs], %[freqs], %[tmp32no13] \n\t"

782 : [freqs] "+r" (freqs)

783 : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11),

784 [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13),

785 [freqabsp] "r" (freqabsp)

786 : "memory"

787 );

788 }

789

790 (*freq_signal_sum_abs) = freqs;

791 #endif

792

793 return time_signal_scaling;

794 }

795

796 int WebRtcAecm_ProcessBlock(AecmCore* aecm,

797 const int16_t* farend,

798 const int16_t* nearendNoisy,

799 const int16_t* nearendClean,

800 int16_t* output) {

801 int i;

802 uint32_t xfaSum;

803 uint32_t dfaNoisySum;

804 uint32_t dfaCleanSum;

805 uint32_t echoEst32Gained;

806 uint32_t tmpU32;

807 int32_t tmp32no1;

808

809 uint16_t xfa[PART_LEN1];

810 uint16_t dfaNoisy[PART_LEN1];

811 uint16_t dfaClean[PART_LEN1];

812 uint16_t* ptrDfaClean = dfaClean;

813 const uint16_t* far_spectrum_ptr = NULL;

814

815 // 32 byte aligned buffers (with +8 or +16).

816 int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.

817 int32_t echoEst32_buf[PART_LEN1 + 8];

818 int32_t dfw_buf[PART_LEN2 + 8];

819 int32_t efw_buf[PART_LEN2 + 8];

820

821 int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31);

822 int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31);

823 ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31);

824 ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31);

825

826 int16_t hnl[PART_LEN1];

827 int16_t numPosCoef = 0;

828 int delay;

829 int16_t tmp16no1;

830 int16_t tmp16no2;

831 int16_t mu;

832 int16_t supGain;

833 int16_t zeros32, zeros16;

834 int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;

835 int far_q;

836 int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;

837

838 const int kMinPrefBand = 4;

839 const int kMaxPrefBand = 24;

840 int32_t avgHnl32 = 0;

841

842 int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;

843 int16_t* ptr;

844 int16_t* ptr1;

845 int16_t* er_ptr;

846 int16_t* dr_ptr;

847

848 ptr = &hnl[0];

849 ptr1 = &hnl[0];

850 er_ptr = &efw[0].real;

851 dr_ptr = &dfw[0].real;

852

853 // Determine startup state. There are three states:

854 // (0) the first CONV_LEN blocks

855 // (1) another CONV_LEN blocks

856 // (2) the rest

857

858 if (aecm->startupState < 2) {

859 aecm->startupState = (aecm->totCount >= CONV_LEN) +

860 (aecm->totCount >= CONV_LEN2);

861 }

862 // END: Determine startup state

863

864 // Buffer near and far end signals

865 memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);

866 memcpy(aecm->dBufNoisy + PART_LEN,

867 nearendNoisy,

868 sizeof(int16_t) * PART_LEN);

869 if (nearendClean != NULL) {

870 memcpy(aecm->dBufClean + PART_LEN,

871 nearendClean,

872 sizeof(int16_t) * PART_LEN);

873 }

874

875 // Transform far end signal from time domain to frequency domain.

876 far_q = TimeToFrequencyDomain(aecm,

877 aecm->xBuf,

878 dfw,

879 xfa,

880 &xfaSum);

881

882 // Transform noisy near end signal from time domain to frequency domain.

883 zerosDBufNoisy = TimeToFrequencyDomain(aecm,

884 aecm->dBufNoisy,

885 dfw,

886 dfaNoisy,

887 &dfaNoisySum);

888 aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;

889 aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;

890

891 if (nearendClean == NULL) {

892 ptrDfaClean = dfaNoisy;

893 aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;

894 aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;

895 dfaCleanSum = dfaNoisySum;

896 } else {

897 // Transform clean near end signal from time domain to frequency domain.

898 zerosDBufClean = TimeToFrequencyDomain(aecm,

899 aecm->dBufClean,

900 dfw,

901 dfaClean,

902 &dfaCleanSum);

903 aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;

904 aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;

905 }

906

907 // Get the delay

908 // Save far-end history and estimate delay

909 WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);

910

911 if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,

912 far_q) == -1) {

913 return -1;

914 }

915 delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,

916 dfaNoisy,

917 PART_LEN1,

918 zerosDBufNoisy);

919 if (delay == -1) {

920 return -1;

921 }

922 else if (delay == -2) {

923 // If the delay is unknown, we assume zero.

924 // NOTE: this will have to be adjusted if we ever add lookahead.

925 delay = 0;

926 }

927

928 if (aecm->fixedDelay >= 0) {

929 // Use fixed delay

930 delay = aecm->fixedDelay;

931 }

932

933 // Get aligned far end spectrum

934 far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);

935 zerosXBuf = (int16_t) far_q;

936

937 if (far_spectrum_ptr == NULL) {

938 return -1;

939 }

940

941 // Calculate log(energy) and update energy threshold levels

942 WebRtcAecm_CalcEnergies(aecm,

943 far_spectrum_ptr,

944 zerosXBuf,

945 dfaNoisySum,

946 echoEst32);

947 // Calculate stepsize

948 mu = WebRtcAecm_CalcStepSize(aecm);

949

950 // Update counters

951 aecm->totCount++;

952

953 // This is the channel estimation algorithm.

954 // It is base on NLMS but has a variable step length,

955 // which was calculated above.

956 WebRtcAecm_UpdateChannel(aecm,

957 far_spectrum_ptr,

958 zerosXBuf,

959 dfaNoisy,

960 mu,

961 echoEst32);

962

963 supGain = WebRtcAecm_CalcSuppressionGain(aecm);

964

965 // Calculate Wiener filter hnl[]

966 for (i = 0; i < PART_LEN1; i++) {

967 // Far end signal through channel estimate in Q8

968 // How much can we shift right to preserve resolution

969 tmp32no1 = echoEst32[i] - aecm->echoFilt[i];

970 aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;

971

972 zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;

973 zeros16 = WebRtcSpl_NormW16(supGain) + 1;

974 if (zeros32 + zeros16 > 16) {

975 // Multiplication is safe

976 // Result in

977 // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])

978 echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],

979 (uint16_t)supGain);

980 resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;

981 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);

982 } else {

983 tmp16no1 = 17 - zeros32 - zeros16;

984 resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -

985 RESOLUTION_SUPGAIN;

986 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);

987 if (zeros32 > tmp16no1) {

988 echoEst32Gained = WEBRTC_SPL_UMUL_32_16(

989 (uint32_t)aecm->echoFilt[i],

990 supGain >> tmp16no1);

991 } else {

992 // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)

993 echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;

994 }

995 }

996

997 zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);

998 assert(zeros16 >= 0); // \|zeros16\| is a norm, hence non-negative.

999 dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;

1000 if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {

1001 tmp16no1 = aecm->nearFilt[i] << zeros16;

1002 qDomainDiff = zeros16 - dfa_clean_q_domain_diff;

1003 tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;

1004 } else {

1005 tmp16no1 = dfa_clean_q_domain_diff < 0

1006 ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff

1007 : aecm->nearFilt[i] << dfa_clean_q_domain_diff;

1008 qDomainDiff = 0;

1009 tmp16no2 = ptrDfaClean[i];

1010 }

1011

1012 tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);

1013 tmp16no2 = (int16_t)(tmp32no1 >> 4);

1014 tmp16no2 += tmp16no1;

1015 zeros16 = WebRtcSpl_NormW16(tmp16no2);

1016 if ((tmp16no2) & (-qDomainDiff > zeros16)) {

1017 aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;

1018 } else {

1019 aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff

1020 : tmp16no2 >> qDomainDiff;

1021 }

1022

1023 // Wiener filter coefficients, resulting hnl in Q14

1024 if (echoEst32Gained == 0) {

1025 hnl[i] = ONE_Q14;

1026 numPosCoef++;

1027 } else if (aecm->nearFilt[i] == 0) {

1028 hnl[i] = 0;

1029 } else {

1030 // Multiply the suppression gain

1031 // Rounding

1032 echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);

1033 tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,

1034 (uint16_t)aecm->nearFilt[i]);

1035

1036 // Current resolution is

1037 // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN

1038 // - max(0, 17 - zeros16 - zeros32))

1039 // Make sure we are in Q14

1040 tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);

1041 if (tmp32no1 > ONE_Q14) {

1042 hnl[i] = 0;

1043 } else if (tmp32no1 < 0) {

1044 hnl[i] = ONE_Q14;

1045 numPosCoef++;

1046 } else {

1047 // 1-echoEst/dfa

1048 hnl[i] = ONE_Q14 - (int16_t)tmp32no1;

1049 if (hnl[i] <= 0) {

1050 hnl[i] = 0;

1051 } else {

1052 numPosCoef++;

1053 }

1054 }

1055 }

1056 }

1057

1058 // Only in wideband. Prevent the gain in upper band from being larger than

1059 // in lower band.

1060 if (aecm->mult == 2) {

1061 // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause

1062 // speech distortion in double-talk.

1063 for (i = 0; i < (PART_LEN1 >> 3); i++) {

1064 __asm __volatile (

1065 "lh %[temp1], 0(%[ptr1]) \n\t"

1066 "lh %[temp2], 2(%[ptr1]) \n\t"

1067 "lh %[temp3], 4(%[ptr1]) \n\t"

1068 "lh %[temp4], 6(%[ptr1]) \n\t"

1069 "lh %[temp5], 8(%[ptr1]) \n\t"

1070 "lh %[temp6], 10(%[ptr1]) \n\t"

1071 "lh %[temp7], 12(%[ptr1]) \n\t"

1072 "lh %[temp8], 14(%[ptr1]) \n\t"

1073 "mul %[temp1], %[temp1], %[temp1] \n\t"

1074 "mul %[temp2], %[temp2], %[temp2] \n\t"

1075 "mul %[temp3], %[temp3], %[temp3] \n\t"

1076 "mul %[temp4], %[temp4], %[temp4] \n\t"

1077 "mul %[temp5], %[temp5], %[temp5] \n\t"

1078 "mul %[temp6], %[temp6], %[temp6] \n\t"

1079 "mul %[temp7], %[temp7], %[temp7] \n\t"

1080 "mul %[temp8], %[temp8], %[temp8] \n\t"

1081 "sra %[temp1], %[temp1], 14 \n\t"

1082 "sra %[temp2], %[temp2], 14 \n\t"

1083 "sra %[temp3], %[temp3], 14 \n\t"

1084 "sra %[temp4], %[temp4], 14 \n\t"

1085 "sra %[temp5], %[temp5], 14 \n\t"

1086 "sra %[temp6], %[temp6], 14 \n\t"

1087 "sra %[temp7], %[temp7], 14 \n\t"

1088 "sra %[temp8], %[temp8], 14 \n\t"

1089 "sh %[temp1], 0(%[ptr1]) \n\t"

1090 "sh %[temp2], 2(%[ptr1]) \n\t"

1091 "sh %[temp3], 4(%[ptr1]) \n\t"

1092 "sh %[temp4], 6(%[ptr1]) \n\t"

1093 "sh %[temp5], 8(%[ptr1]) \n\t"

1094 "sh %[temp6], 10(%[ptr1]) \n\t"

1095 "sh %[temp7], 12(%[ptr1]) \n\t"

1096 "sh %[temp8], 14(%[ptr1]) \n\t"

1097 "addiu %[ptr1], %[ptr1], 16 \n\t"

1098 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

1099 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),

1100 [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1)

1101 :

1102 : "memory", "hi", "lo"

1103 );

1104 }

1105 for(i = 0; i < (PART_LEN1 & 7); i++) {

1106 __asm __volatile (

1107 "lh %[temp1], 0(%[ptr1]) \n\t"

1108 "mul %[temp1], %[temp1], %[temp1] \n\t"

1109 "sra %[temp1], %[temp1], 14 \n\t"

1110 "sh %[temp1], 0(%[ptr1]) \n\t"

1111 "addiu %[ptr1], %[ptr1], 2 \n\t"

1112 : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1)

1113 :

1114 : "memory", "hi", "lo"

1115 );

1116 }

1117

1118 for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {

1119 avgHnl32 += (int32_t)hnl[i];

1120 }

1121

1122 assert(kMaxPrefBand - kMinPrefBand + 1 > 0);

1123 avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);

1124

1125 for (i = kMaxPrefBand; i < PART_LEN1; i++) {

1126 if (hnl[i] > (int16_t)avgHnl32) {

1127 hnl[i] = (int16_t)avgHnl32;

1128 }

1129 }

1130 }

1131

1132 // Calculate NLP gain, result is in Q14

1133 if (aecm->nlpFlag) {

1134 if (numPosCoef < 3) {

1135 for (i = 0; i < PART_LEN1; i++) {

1136 efw[i].real = 0;

1137 efw[i].imag = 0;

1138 hnl[i] = 0;

1139 }

1140 } else {

1141 for (i = 0; i < PART_LEN1; i++) {

1142 #if defined(MIPS_DSP_R1_LE)

1143 __asm __volatile (

1144 ".set push \n\t"

1145 ".set noreorder \n\t"

1146 "lh %[temp1], 0(%[ptr]) \n\t"

1147 "lh %[temp2], 0(%[dr_ptr]) \n\t"

1148 "slti %[temp4], %[temp1], 0x4001 \n\t"

1149 "beqz %[temp4], 3f \n\t"

1150 " lh %[temp3], 2(%[dr_ptr]) \n\t"

1151 "slti %[temp5], %[temp1], 3277 \n\t"

1152 "bnez %[temp5], 2f \n\t"

1153 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"

1154 "mul %[temp2], %[temp2], %[temp1] \n\t"

1155 "mul %[temp3], %[temp3], %[temp1] \n\t"

1156 "shra_r.w %[temp2], %[temp2], 14 \n\t"

1157 "shra_r.w %[temp3], %[temp3], 14 \n\t"

1158 "b 4f \n\t"

1159 " nop \n\t"

1160 "2: \n\t"

1161 "addu %[temp1], $zero, $zero \n\t"

1162 "addu %[temp2], $zero, $zero \n\t"

1163 "addu %[temp3], $zero, $zero \n\t"

1164 "b 1f \n\t"

1165 " nop \n\t"

1166 "3: \n\t"

1167 "addiu %[temp1], $0, 0x4000 \n\t"

1168 "1: \n\t"

1169 "sh %[temp1], 0(%[ptr]) \n\t"

1170 "4: \n\t"

1171 "sh %[temp2], 0(%[er_ptr]) \n\t"

1172 "sh %[temp3], 2(%[er_ptr]) \n\t"

1173 "addiu %[ptr], %[ptr], 2 \n\t"

1174 "addiu %[er_ptr], %[er_ptr], 4 \n\t"

1175 ".set pop \n\t"

1176 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

1177 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),

1178 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)

1179 :

1180 : "memory", "hi", "lo"

1181 );

1182 #else

1183 __asm __volatile (

1184 ".set push \n\t"

1185 ".set noreorder \n\t"

1186 "lh %[temp1], 0(%[ptr]) \n\t"

1187 "lh %[temp2], 0(%[dr_ptr]) \n\t"

1188 "slti %[temp4], %[temp1], 0x4001 \n\t"

1189 "beqz %[temp4], 3f \n\t"

1190 " lh %[temp3], 2(%[dr_ptr]) \n\t"

1191 "slti %[temp5], %[temp1], 3277 \n\t"

1192 "bnez %[temp5], 2f \n\t"

1193 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"

1194 "mul %[temp2], %[temp2], %[temp1] \n\t"

1195 "mul %[temp3], %[temp3], %[temp1] \n\t"

1196 "addiu %[temp2], %[temp2], 0x2000 \n\t"

1197 "addiu %[temp3], %[temp3], 0x2000 \n\t"

1198 "sra %[temp2], %[temp2], 14 \n\t"

1199 "sra %[temp3], %[temp3], 14 \n\t"

1200 "b 4f \n\t"

1201 " nop \n\t"

1202 "2: \n\t"

1203 "addu %[temp1], $zero, $zero \n\t"

1204 "addu %[temp2], $zero, $zero \n\t"

1205 "addu %[temp3], $zero, $zero \n\t"

1206 "b 1f \n\t"

1207 " nop \n\t"

1208 "3: \n\t"

1209 "addiu %[temp1], $0, 0x4000 \n\t"

1210 "1: \n\t"

1211 "sh %[temp1], 0(%[ptr]) \n\t"

1212 "4: \n\t"

1213 "sh %[temp2], 0(%[er_ptr]) \n\t"

1214 "sh %[temp3], 2(%[er_ptr]) \n\t"

1215 "addiu %[ptr], %[ptr], 2 \n\t"

1216 "addiu %[er_ptr], %[er_ptr], 4 \n\t"

1217 ".set pop \n\t"

1218 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

1219 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),

1220 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)

1221 :

1222 : "memory", "hi", "lo"

1223 );

1224 #endif

1225 }

1226 }

1227 }

1228 else {

1229 // multiply with Wiener coefficients

1230 for (i = 0; i < PART_LEN1; i++) {

1231 efw[i].real = (int16_t)

1232 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,

1233 hnl[i],

1234 14));

1235 efw[i].imag = (int16_t)

1236 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,

1237 hnl[i],

1238 14));

1239 }

1240 }

1241

1242 if (aecm->cngMode == AecmTrue) {

1243 ComfortNoise(aecm, ptrDfaClean, efw, hnl);

1244 }

1245

1246 InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);

1247

1248 return 0;

1249 }

1250

1251 // Generate comfort noise and add to output signal.

1252 static void ComfortNoise(AecmCore* aecm,

1253 const uint16_t* dfa,

1254 ComplexInt16* out,

1255 const int16_t* lambda) {

1256 int16_t i;

1257 int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;

1258 int32_t tmp32, tmp321, tnoise, tnoise1;

1259 int32_t tmp322, tmp323, *tmp1;

1260 int16_t* dfap;

1261 int16_t* lambdap;

1262 const int32_t c2049 = 2049;

1263 const int32_t c359 = 359;

1264 const int32_t c114 = ONE_Q14;

1265

1266 int16_t randW16[PART_LEN];

1267 int16_t uReal[PART_LEN1];

1268 int16_t uImag[PART_LEN1];

1269 int32_t outLShift32;

1270

1271 int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;

1272 int16_t minTrackShift = 9;

1273

1274 assert(shiftFromNearToNoise >= 0);

1275 assert(shiftFromNearToNoise < 16);

1276

1277 if (aecm->noiseEstCtr < 100) {

1278 // Track the minimum more quickly initially.

1279 aecm->noiseEstCtr++;

1280 minTrackShift = 6;

1281 }

1282

1283 // Generate a uniform random array on [0 2^15-1].

1284 WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);

1285 int16_t* randW16p = (int16_t*)randW16;

1286 #if defined (MIPS_DSP_R1_LE)

1287 int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;

1288 int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;

1289 #endif // #if defined(MIPS_DSP_R1_LE)

1290 tmp1 = (int32_t*)aecm->noiseEst + 1;

1291 dfap = (int16_t*)dfa + 1;

1292 lambdap = (int16_t*)lambda + 1;

1293 // Estimate noise power.

1294 for (i = 1; i < PART_LEN1; i+=2) {

1295 // Shift to the noise domain.

1296 __asm __volatile (

1297 "lh %[tmp32], 0(%[dfap]) \n\t"

1298 "lw %[tnoise], 0(%[tmp1]) \n\t"

1299 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"

1300 : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32),

1301 [tnoise] "=&r" (tnoise)

1302 : [tmp1] "r" (tmp1), [dfap] "r" (dfap),

1303 [shiftFromNearToNoise] "r" (shiftFromNearToNoise)

1304 : "memory"

1305 );

1306

1307 if (outLShift32 < tnoise) {

1308 // Reset "too low" counter

1309 aecm->noiseEstTooLowCtr[i] = 0;

1310 // Track the minimum.

1311 if (tnoise < (1 << minTrackShift)) {

1312 // For small values, decrease noiseEst[i] every

1313 // \|kNoiseEstIncCount\| block. The regular approach below can not

1314 // go further down due to truncation.

1315 aecm->noiseEstTooHighCtr[i]++;

1316 if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {

1317 tnoise--;

1318 aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter

1319 }

1320 } else {

1321 __asm __volatile (

1322 "subu %[tmp32], %[tnoise], %[outLShift32] \n\t"

1323 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"

1324 "subu %[tnoise], %[tnoise], %[tmp32] \n\t"

1325 : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise)

1326 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)

1327 );

1328 }

1329 } else {

1330 // Reset "too high" counter

1331 aecm->noiseEstTooHighCtr[i] = 0;

1332 // Ramp slowly upwards until we hit the minimum again.

1333 if ((tnoise >> 19) <= 0) {

1334 if ((tnoise >> 11) > 0) {

1335 // Large enough for relative increase

1336 __asm __volatile (

1337 "mul %[tnoise], %[tnoise], %[c2049] \n\t"

1338 "sra %[tnoise], %[tnoise], 11 \n\t"

1339 : [tnoise] "+r" (tnoise)

1340 : [c2049] "r" (c2049)

1341 : "hi", "lo"

1342 );

1343 } else {

1344 // Make incremental increases based on size every

1345 // \|kNoiseEstIncCount\| block

1346 aecm->noiseEstTooLowCtr[i]++;

1347 if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {

1348 __asm __volatile (

1349 "sra %[tmp32], %[tnoise], 9 \n\t"

1350 "addi %[tnoise], %[tnoise], 1 \n\t"

1351 "addu %[tnoise], %[tnoise], %[tmp32] \n\t"

1352 : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32)

1353 :

1354 );

1355 aecm->noiseEstTooLowCtr[i] = 0; // Reset counter

1356 }

1357 }

1358 } else {

1359 // Avoid overflow.

1360 // Multiplication with 2049 will cause wrap around. Scale

1361 // down first and then multiply

1362 __asm __volatile (

1363 "sra %[tnoise], %[tnoise], 11 \n\t"

1364 "mul %[tnoise], %[tnoise], %[c2049] \n\t"

1365 : [tnoise] "+r" (tnoise)

1366 : [c2049] "r" (c2049)

1367 : "hi", "lo"

1368 );

1369 }

1370 }

1371

1372 // Shift to the noise domain.

1373 __asm __volatile (

1374 "lh %[tmp32], 2(%[dfap]) \n\t"

1375 "lw %[tnoise1], 4(%[tmp1]) \n\t"

1376 "addiu %[dfap], %[dfap], 4 \n\t"

1377 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"

1378 : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap),

1379 [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1)

1380 : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise)

1381 : "memory"

1382 );

1383

1384 if (outLShift32 < tnoise1) {

1385 // Reset "too low" counter

1386 aecm->noiseEstTooLowCtr[i + 1] = 0;

1387 // Track the minimum.

1388 if (tnoise1 < (1 << minTrackShift)) {

1389 // For small values, decrease noiseEst[i] every

1390 // \|kNoiseEstIncCount\| block. The regular approach below can not

1391 // go further down due to truncation.

1392 aecm->noiseEstTooHighCtr[i + 1]++;

1393 if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {

1394 tnoise1--;

1395 aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter

1396 }

1397 } else {

1398 __asm __volatile (

1399 "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t"

1400 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"

1401 "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t"

1402 : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1)

1403 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)

1404 );

1405 }

1406 } else {

1407 // Reset "too high" counter

1408 aecm->noiseEstTooHighCtr[i + 1] = 0;

1409 // Ramp slowly upwards until we hit the minimum again.

1410 if ((tnoise1 >> 19) <= 0) {

1411 if ((tnoise1 >> 11) > 0) {

1412 // Large enough for relative increase

1413 __asm __volatile (

1414 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"

1415 "sra %[tnoise1], %[tnoise1], 11 \n\t"

1416 : [tnoise1] "+r" (tnoise1)

1417 : [c2049] "r" (c2049)

1418 : "hi", "lo"

1419 );

1420 } else {

1421 // Make incremental increases based on size every

1422 // \|kNoiseEstIncCount\| block

1423 aecm->noiseEstTooLowCtr[i + 1]++;

1424 if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {

1425 __asm __volatile (

1426 "sra %[tmp32], %[tnoise1], 9 \n\t"

1427 "addi %[tnoise1], %[tnoise1], 1 \n\t"

1428 "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t"

1429 : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32)

1430 :

1431 );

1432 aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter

1433 }

1434 }

1435 } else {

1436 // Avoid overflow.

1437 // Multiplication with 2049 will cause wrap around. Scale

1438 // down first and then multiply

1439 __asm __volatile (

1440 "sra %[tnoise1], %[tnoise1], 11 \n\t"

1441 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"

1442 : [tnoise1] "+r" (tnoise1)

1443 : [c2049] "r" (c2049)

1444 : "hi", "lo"

1445 );

1446 }

1447 }

1448

1449 __asm __volatile (

1450 "lh %[tmp16], 0(%[lambdap]) \n\t"

1451 "lh %[tmp161], 2(%[lambdap]) \n\t"

1452 "sw %[tnoise], 0(%[tmp1]) \n\t"

1453 "sw %[tnoise1], 4(%[tmp1]) \n\t"

1454 "subu %[tmp16], %[c114], %[tmp16] \n\t"

1455 "subu %[tmp161], %[c114], %[tmp161] \n\t"

1456 "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t"

1457 "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t"

1458 "addiu %[lambdap], %[lambdap], 4 \n\t"

1459 "addiu %[tmp1], %[tmp1], 8 \n\t"

1460 : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1),

1461 [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap)

1462 : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114),

1463 [shiftFromNearToNoise] "r" (shiftFromNearToNoise)

1464 : "memory"

1465 );

1466

1467 if (tmp32 > 32767) {

1468 tmp32 = 32767;

1469 aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;

1470 }

1471 if (tmp321 > 32767) {

1472 tmp321 = 32767;

1473 aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise;

1474 }

1475

1476 __asm __volatile (

1477 "mul %[tmp32], %[tmp32], %[tmp16] \n\t"

1478 "mul %[tmp321], %[tmp321], %[tmp161] \n\t"

1479 "sra %[nrsh1], %[tmp32], 14 \n\t"

1480 "sra %[nrsh2], %[tmp321], 14 \n\t"

1481 : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2)

1482 : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32),

1483 [tmp321] "r" (tmp321)

1484 : "memory", "hi", "lo"

1485 );

1486

1487 __asm __volatile (

1488 "lh %[tmp32], 0(%[randW16p]) \n\t"

1489 "lh %[tmp321], 2(%[randW16p]) \n\t"

1490 "addiu %[randW16p], %[randW16p], 4 \n\t"

1491 "mul %[tmp32], %[tmp32], %[c359] \n\t"

1492 "mul %[tmp321], %[tmp321], %[c359] \n\t"

1493 "sra %[tmp16], %[tmp32], 15 \n\t"

1494 "sra %[tmp161], %[tmp321], 15 \n\t"

1495 : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32),

1496 [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321)

1497 : [c359] "r" (c359)

1498 : "memory", "hi", "lo"

1499 );

1500

1501 #if !defined(MIPS_DSP_R1_LE)

1502 tmp32 = WebRtcAecm_kCosTable[tmp16];

1503 tmp321 = WebRtcAecm_kSinTable[tmp16];

1504 tmp322 = WebRtcAecm_kCosTable[tmp161];

1505 tmp323 = WebRtcAecm_kSinTable[tmp161];

1506 #else

1507 __asm __volatile (

1508 "sll %[tmp16], %[tmp16], 1 \n\t"

1509 "sll %[tmp161], %[tmp161], 1 \n\t"

1510 "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t"

1511 "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t"

1512 "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t"

1513 "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t"

1514 : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321),

1515 [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323)

1516 : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16),

1517 [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep)

1518 : "memory"

1519 );

1520 #endif

1521 __asm __volatile (

1522 "mul %[tmp32], %[tmp32], %[nrsh1] \n\t"

1523 "negu %[tmp162], %[nrsh1] \n\t"

1524 "mul %[tmp322], %[tmp322], %[nrsh2] \n\t"

1525 "negu %[tmp163], %[nrsh2] \n\t"

1526 "sra %[tmp32], %[tmp32], 13 \n\t"

1527 "mul %[tmp321], %[tmp321], %[tmp162] \n\t"

1528 "sra %[tmp322], %[tmp322], 13 \n\t"

1529 "mul %[tmp323], %[tmp323], %[tmp163] \n\t"

1530 "sra %[tmp321], %[tmp321], 13 \n\t"

1531 "sra %[tmp323], %[tmp323], 13 \n\t"

1532 : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162),

1533 [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163)

1534 : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2)

1535 : "hi", "lo"

1536 );

1537 // Tables are in Q13.

1538 uReal[i] = (int16_t)tmp32;

1539 uImag[i] = (int16_t)tmp321;

1540 uReal[i + 1] = (int16_t)tmp322;

1541 uImag[i + 1] = (int16_t)tmp323;

1542 }

1543

1544 int32_t tt, sgn;

1545 tt = out[0].real;

1546 sgn = ((int)tt) >> 31;

1547 out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

1548 tt = out[0].imag;

1549 sgn = ((int)tt) >> 31;

1550 out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

1551 for (i = 1; i < PART_LEN; i++) {

1552 tt = out[i].real + uReal[i];

1553 sgn = ((int)tt) >> 31;

1554 out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

1555 tt = out[i].imag + uImag[i];

1556 sgn = ((int)tt) >> 31;

1557 out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

1558 }

1559 tt = out[PART_LEN].real + uReal[PART_LEN];

1560 sgn = ((int)tt) >> 31;

1561 out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

1562 tt = out[PART_LEN].imag;

1563 sgn = ((int)tt) >> 31;

1564 out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

1565 }

1566

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aecm/aecm_core_c.cc ('k') | webrtc/modules/audio_processing/aecm/aecm_core_mips.cc » ('j') | no next file with comments »