webrtc/modules/audio_processing/aec/aec_core_mips.cc - Issue 1942853002: Removed the MIPS optimized code for the comfort noise generation in the AEC.

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_mips.cc

Issue 1942853002: Removed the MIPS optimized code for the comfort noise generation in the AEC. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec2_CL

Patch Set: Rebase Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 /*	11 /*

12 * The core AEC algorithm, which is presented with time-aligned signals.	12 * The core AEC algorithm, which is presented with time-aligned signals.

13 */	13 */

14	14

15 #include "webrtc/modules/audio_processing/aec/aec_core.h"	15 #include "webrtc/modules/audio_processing/aec/aec_core.h"

16	16

17 #include <math.h>	17 #include <math.h>

18	18

19 extern "C" {	19 extern "C" {

20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

21 }	21 }

22 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"	22 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"

23 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"	23 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"

24	24

25 namespace webrtc {	25 namespace webrtc {

26	26

27 extern const float WebRtcAec_weightCurve[65];	27 extern const float WebRtcAec_weightCurve[65];

28 extern const float WebRtcAec_overDriveCurve[65];	28 extern const float WebRtcAec_overDriveCurve[65];

29	29

30 void WebRtcAec_ComfortNoise_mips(AecCore* aec,

31 float efw[2][PART_LEN1],

32 float comfortNoiseHband[2][PART_LEN1],

33 const float* noisePow,

34 const float* lambda) {

35 int i, num;

36 float rand[PART_LEN];

37 float noise, noiseAvg, tmp, tmpAvg;

38 int16_t randW16[PART_LEN];

39 complex_t u[PART_LEN1];

40

41 const float pi2 = 6.28318530717959f;

42 const float pi2t = pi2 / 32768;

43

44 // Generate a uniform random array on [0 1]

45 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);

46

47 int16_t* randWptr = randW16;

48 float randTemp, randTemp2, randTemp3, randTemp4;

49 int32_t tmp1s, tmp2s, tmp3s, tmp4s;

50

51 for (i = 0; i < PART_LEN; i += 4) {

52 __asm __volatile(

53 ".set push \n\t"

54 ".set noreorder \n\t"

55 "lh %[tmp1s], 0(%[randWptr]) \n\t"

56 "lh %[tmp2s], 2(%[randWptr]) \n\t"

57 "lh %[tmp3s], 4(%[randWptr]) \n\t"

58 "lh %[tmp4s], 6(%[randWptr]) \n\t"

59 "mtc1 %[tmp1s], %[randTemp] \n\t"

60 "mtc1 %[tmp2s], %[randTemp2] \n\t"

61 "mtc1 %[tmp3s], %[randTemp3] \n\t"

62 "mtc1 %[tmp4s], %[randTemp4] \n\t"

63 "cvt.s.w %[randTemp], %[randTemp] \n\t"

64 "cvt.s.w %[randTemp2], %[randTemp2] \n\t"

65 "cvt.s.w %[randTemp3], %[randTemp3] \n\t"

66 "cvt.s.w %[randTemp4], %[randTemp4] \n\t"

67 "addiu %[randWptr], %[randWptr], 8 \n\t"

68 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"

69 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"

70 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"

71 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"

72 ".set pop \n\t"

73 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),

74 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),

75 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),

76 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),

77 [tmp4s] "=&r" (tmp4s)

78 : [pi2t] "f" (pi2t)

79 : "memory");

80

81 u[i + 1][0] = cosf(randTemp);

82 u[i + 1][1] = sinf(randTemp);

83 u[i + 2][0] = cosf(randTemp2);

84 u[i + 2][1] = sinf(randTemp2);

85 u[i + 3][0] = cosf(randTemp3);

86 u[i + 3][1] = sinf(randTemp3);

87 u[i + 4][0] = cosf(randTemp4);

88 u[i + 4][1] = sinf(randTemp4);

89 }

90

91 // Reject LF noise

92 float* u_ptr = &u[1][0];

93 float noise2, noise3, noise4;

94 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;

95

96 u[0][0] = 0;

97 u[0][1] = 0;

98 for (i = 1; i < PART_LEN1; i += 4) {

99 __asm __volatile(

100 ".set push \n\t"

101 ".set noreorder \n\t"

102 "lwc1 %[noise], 4(%[noisePow]) \n\t"

103 "lwc1 %[noise2], 8(%[noisePow]) \n\t"

104 "lwc1 %[noise3], 12(%[noisePow]) \n\t"

105 "lwc1 %[noise4], 16(%[noisePow]) \n\t"

106 "sqrt.s %[noise], %[noise] \n\t"

107 "sqrt.s %[noise2], %[noise2] \n\t"

108 "sqrt.s %[noise3], %[noise3] \n\t"

109 "sqrt.s %[noise4], %[noise4] \n\t"

110 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"

111 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"

112 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"

113 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"

114 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"

115 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"

116 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"

117 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"

118 "addiu %[noisePow], %[noisePow], 16 \n\t"

119 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"

120 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"

121 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"

122 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"

123 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"

124 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"

125 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t"

126 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t"

127 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"

128 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"

129 "neg.s %[tmp2f] \n\t"

130 "neg.s %[tmp4f] \n\t"

131 "neg.s %[tmp6f] \n\t"

132 "neg.s %[tmp8f] \n\t"

133 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t"

134 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t"

135 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t"

136 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t"

137 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t"

138 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t"

139 "addiu %[u_ptr], %[u_ptr], 32 \n\t"

140 ".set pop \n\t"

141 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),

142 [noise] "=&f" (noise), [noise2] "=&f" (noise2),

143 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),

144 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),

145 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),

146 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),

147 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)

148 :

149 : "memory");

150 }

151 u[PART_LEN][1] = 0;

152 noisePow -= PART_LEN;

153

154 u_ptr = &u[0][0];

155 float* u_ptr_end = &u[PART_LEN][0];

156 float* efw_ptr_0 = &efw[0][0];

157 float* efw_ptr_1 = &efw[1][0];

158 float tmp9f, tmp10f;

159 const float tmp1c = 1.0;

160

161 __asm __volatile(

162 ".set push \n\t"

163 ".set noreorder \n\t"

164 "1: \n\t"

165 "lwc1 %[tmp1f], 0(%[lambda]) \n\t"

166 "lwc1 %[tmp6f], 4(%[lambda]) \n\t"

167 "addiu %[lambda], %[lambda], 8 \n\t"

168 "c.lt.s %[tmp1f], %[tmp1c] \n\t"

169 "bc1f 4f \n\t"

170 " nop \n\t"

171 "c.lt.s %[tmp6f], %[tmp1c] \n\t"

172 "bc1f 3f \n\t"

173 " nop \n\t"

174 "2: \n\t"

175 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"

176 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"

177 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"

178 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"

179 "sqrt.s %[tmp1f], %[tmp1f] \n\t"

180 "sqrt.s %[tmp6f], %[tmp6f] \n\t"

181 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"

182 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"

183 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"

184 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"

185 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"

186 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"

187 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"

188 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"

189 #if !defined(MIPS32_R2_LE)

190 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"

191 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"

192 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"

193 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"

194 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"

195 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"

196 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"

197 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"

198 #else // #if !defined(MIPS32_R2_LE)

199 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"

200 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"

201 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"

202 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"

203 #endif // #if !defined(MIPS32_R2_LE)

204 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"

205 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"

206 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"

207 "b 5f \n\t"

208 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"

209 "3: \n\t"

210 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"

211 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"

212 "sqrt.s %[tmp1f], %[tmp1f] \n\t"

213 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"

214 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"

215 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"

216 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"

217 #if !defined(MIPS32_R2_LE)

218 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"

219 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"

220 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"

221 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"

222 #else // #if !defined(MIPS32_R2_LE)

223 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"

224 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"

225 #endif // #if !defined(MIPS32_R2_LE)

226 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"

227 "b 5f \n\t"

228 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"

229 "4: \n\t"

230 "c.lt.s %[tmp6f], %[tmp1c] \n\t"

231 "bc1f 5f \n\t"

232 " nop \n\t"

233 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"

234 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"

235 "sqrt.s %[tmp6f], %[tmp6f] \n\t"

236 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"

237 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"

238 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"

239 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"

240 #if !defined(MIPS32_R2_LE)

241 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"

242 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"

243 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"

244 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"

245 #else // #if !defined(MIPS32_R2_LE)

246 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"

247 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"

248 #endif // #if !defined(MIPS32_R2_LE)

249 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"

250 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"

251 "5: \n\t"

252 "addiu %[u_ptr], %[u_ptr], 16 \n\t"

253 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"

254 "bne %[u_ptr], %[u_ptr_end], 1b \n\t"

255 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"

256 ".set pop \n\t"

257 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),

258 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),

259 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),

260 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),

261 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),

262 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)

263 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)

264 : "memory");

265

266 lambda -= PART_LEN;

267 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));

268 // tmp = 1 - lambda[i];

269 efw[0][PART_LEN] += tmp * u[PART_LEN][0];

270 efw[1][PART_LEN] += tmp * u[PART_LEN][1];

271

272 // For H band comfort noise

273 // TODO(peah): don't compute noise and "tmp" twice. Use the previous results.

274 noiseAvg = 0.0;

275 tmpAvg = 0.0;

276 num = 0;

277 if (aec->num_bands > 1) {

278 for (i = 0; i < PART_LEN; i++) {

279 rand[i] = (static_cast<float>(randW16[i])) / 32768;

280 }

281

282 // average noise scale

283 // average over second half of freq spectrum (i.e., 4->8khz)

284 // TODO(peah): we shouldn't need num. We know how many elements we're

285 // summing.

286 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {

287 num++;

288 noiseAvg += sqrtf(noisePow[i]);

289 }

290 noiseAvg /= static_cast<float>(num);

291

292 // average nlp scale

293 // average over second half of freq spectrum (i.e., 4->8khz)

294 // TODO(peah): we shouldn't need num. We know how many elements we're

295 // summing.

296 num = 0;

297 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {

298 num++;

299 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));

300 }

301 tmpAvg /= static_cast<float>(num);

302

303 // Use average noise for H band

304 // TODO(peah): we should probably have a new random vector here.

305 // Reject LF noise

306 u[0][0] = 0;

307 u[0][1] = 0;

308 for (i = 1; i < PART_LEN1; i++) {

309 tmp = pi2 * rand[i - 1];

310

311 // Use average noise for H band

312 u[i][0] = noiseAvg * static_cast<float>(cos(tmp));

313 u[i][1] = -noiseAvg * static_cast<float>(sin(tmp));

314 }

315 u[PART_LEN][1] = 0;

316

317 for (i = 0; i < PART_LEN1; i++) {

318 // Use average NLP weight for H band

319 comfortNoiseHband[0][i] = tmpAvg * u[i][0];

320 comfortNoiseHband[1][i] = tmpAvg * u[i][1];

321 }

322 } else {

323 memset(comfortNoiseHband, 0,

324 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));

325 }

326 }

327

328 void WebRtcAec_FilterFar_mips(	30 void WebRtcAec_FilterFar_mips(

329 int num_partitions,	31 int num_partitions,

330 int x_fft_buf_block_pos,	32 int x_fft_buf_block_pos,

331 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],	33 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],

332 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],	34 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],

333 float y_fft[2][PART_LEN1]) {	35 float y_fft[2][PART_LEN1]) {

334 int i;	36 int i;

335 for (i = 0; i < num_partitions; i++) {	37 for (i = 0; i < num_partitions; i++) {

336 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;	38 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;

337 int pos = i * PART_LEN1;	39 int pos = i * PART_LEN1;

(...skipping 428 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
766 [len] "+r" (len)	468 [len] "+r" (len)

767 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),	469 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),

768 [err_th] "f" (error_threshold)	470 [err_th] "f" (error_threshold)

769 : "memory");	471 : "memory");

770 }	472 }

771	473

772 void WebRtcAec_InitAec_mips(void) {	474 void WebRtcAec_InitAec_mips(void) {

773 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;	475 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;

774 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;	476 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;

775 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;	477 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;

776 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;

777 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;	478 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;

778 }	479 }

779 } // namespace webrtc	480 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | no next file » | no next file with comments »