OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
367 exp2_y = vmlaq_f32(C1, y, exp2_y); | 367 exp2_y = vmlaq_f32(C1, y, exp2_y); |
368 exp2_y = vmlaq_f32(C0, y, exp2_y); | 368 exp2_y = vmlaq_f32(C0, y, exp2_y); |
369 | 369 |
370 // Combine parts. | 370 // Combine parts. |
371 a_exp_b = vmulq_f32(exp2_y, two_n); | 371 a_exp_b = vmulq_f32(exp2_y, two_n); |
372 } | 372 } |
373 | 373 |
374 return a_exp_b; | 374 return a_exp_b; |
375 } | 375 } |
376 | 376 |
377 static void OverdriveAndSuppressNEON(AecCore* aec, | 377 static void OverdriveAndSuppressNEON(float overdrive_scaling, |
378 float hNl[PART_LEN1], | 378 float hNl[PART_LEN1], |
379 const float hNlFb, | 379 const float hNlFb, |
380 float efw[2][PART_LEN1]) { | 380 float efw[2][PART_LEN1]) { |
381 int i; | 381 int i; |
382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); | 382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); |
383 const float32x4_t vec_one = vdupq_n_f32(1.0f); | 383 const float32x4_t vec_one = vdupq_n_f32(1.0f); |
384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); | 384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); |
385 const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm); | 385 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); |
386 | 386 |
387 // vectorized code (four at once) | 387 // vectorized code (four at once) |
388 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 388 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
389 // Weight subbands | 389 // Weight subbands |
390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); | 390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); |
391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); | 391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); |
392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); | 392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); |
393 const float32x4_t vec_weightCurve_hNlFb = | 393 const float32x4_t vec_weightCurve_hNlFb = |
394 vmulq_f32(vec_weightCurve, vec_hNlFb); | 394 vmulq_f32(vec_weightCurve, vec_hNlFb); |
395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); | 395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); |
396 const float32x4_t vec_one_weightCurve_hNl = | 396 const float32x4_t vec_one_weightCurve_hNl = |
397 vmulq_f32(vec_one_weightCurve, vec_hNl); | 397 vmulq_f32(vec_one_weightCurve, vec_hNl); |
398 const uint32x4_t vec_if0 = | 398 const uint32x4_t vec_if0 = |
399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); | 399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); |
400 const float32x4_t vec_one_weightCurve_add = | 400 const float32x4_t vec_one_weightCurve_add = |
401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); | 401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); |
402 const uint32x4_t vec_if1 = | 402 const uint32x4_t vec_if1 = |
403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); | 403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); |
404 | 404 |
405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); | 405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); |
406 | 406 |
407 { | 407 { |
408 const float32x4_t vec_overDriveCurve = | 408 const float32x4_t vec_overDriveCurve = |
409 vld1q_f32(&WebRtcAec_overDriveCurve[i]); | 409 vld1q_f32(&WebRtcAec_overDriveCurve[i]); |
410 const float32x4_t vec_overDriveSm_overDriveCurve = | 410 const float32x4_t vec_overDriveSm_overDriveCurve = |
411 vmulq_f32(vec_overDriveSm, vec_overDriveCurve); | 411 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); |
412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); | 412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); |
413 vst1q_f32(&hNl[i], vec_hNl); | 413 vst1q_f32(&hNl[i], vec_hNl); |
414 } | 414 } |
415 | 415 |
416 // Suppress error signal | 416 // Suppress error signal |
417 { | 417 { |
418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); | 418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); |
419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); | 419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); |
420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); | 420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); |
421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); | 421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); |
422 | 422 |
423 // Ooura fft returns incorrect sign on imaginary component. It matters | 423 // Ooura fft returns incorrect sign on imaginary component. It matters |
424 // here because we are making an additive change with comfort noise. | 424 // here because we are making an additive change with comfort noise. |
425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); | 425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); |
426 vst1q_f32(&efw[0][i], vec_efw_re); | 426 vst1q_f32(&efw[0][i], vec_efw_re); |
427 vst1q_f32(&efw[1][i], vec_efw_im); | 427 vst1q_f32(&efw[1][i], vec_efw_im); |
428 } | 428 } |
429 } | 429 } |
430 | 430 |
431 // scalar code for the remaining items. | 431 // scalar code for the remaining items. |
432 for (; i < PART_LEN1; i++) { | 432 for (; i < PART_LEN1; i++) { |
433 // Weight subbands | 433 // Weight subbands |
434 if (hNl[i] > hNlFb) { | 434 if (hNl[i] > hNlFb) { |
435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + | 435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + |
436 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; | 436 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; |
437 } | 437 } |
438 | 438 |
439 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); | 439 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); |
440 | 440 |
441 // Suppress error signal | 441 // Suppress error signal |
442 efw[0][i] *= hNl[i]; | 442 efw[0][i] *= hNl[i]; |
443 efw[1][i] *= hNl[i]; | 443 efw[1][i] *= hNl[i]; |
444 | 444 |
445 // Ooura fft returns incorrect sign on imaginary component. It matters | 445 // Ooura fft returns incorrect sign on imaginary component. It matters |
446 // here because we are making an additive change with comfort noise. | 446 // here because we are making an additive change with comfort noise. |
447 efw[1][i] *= -1; | 447 efw[1][i] *= -1; |
448 } | 448 } |
449 } | 449 } |
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
717 WebRtcAec_FilterFar = FilterFarNEON; | 717 WebRtcAec_FilterFar = FilterFarNEON; |
718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
723 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 723 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
724 WebRtcAec_WindowData = WindowDataNEON; | 724 WebRtcAec_WindowData = WindowDataNEON; |
725 } | 725 } |
726 } // namespace webrtc | 726 } // namespace webrtc |
OLD | NEW |