OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
495 | 495 |
496 // Updates the following smoothed Power Spectral Densities (PSD): | 496 // Updates the following smoothed Power Spectral Densities (PSD): |
497 // - sd : near-end | 497 // - sd : near-end |
498 // - se : residual echo | 498 // - se : residual echo |
499 // - sx : far-end | 499 // - sx : far-end |
500 // - sde : cross-PSD of near-end and residual echo | 500 // - sde : cross-PSD of near-end and residual echo |
501 // - sxd : cross-PSD of near-end and far-end | 501 // - sxd : cross-PSD of near-end and far-end |
502 // | 502 // |
503 // In addition to updating the PSDs, also the filter diverge state is determined | 503 // In addition to updating the PSDs, also the filter diverge state is determined |
504 // upon actions are taken. | 504 // upon actions are taken. |
505 static void SmoothedPSD(AecCore* aec, | 505 static void SmoothedPSD(int mult, |
| 506 bool extended_filter_enabled, |
506 float efw[2][PART_LEN1], | 507 float efw[2][PART_LEN1], |
507 float dfw[2][PART_LEN1], | 508 float dfw[2][PART_LEN1], |
508 float xfw[2][PART_LEN1], | 509 float xfw[2][PART_LEN1], |
| 510 CoherenceState* coherence_state, |
| 511 short* filter_divergence_state, |
509 int* extreme_filter_divergence) { | 512 int* extreme_filter_divergence) { |
510 // Power estimate smoothing coefficients. | 513 // Power estimate smoothing coefficients. |
511 const float* ptrGCoh = | 514 const float* ptrGCoh = |
512 aec->extended_filter_enabled | 515 extended_filter_enabled |
513 ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] | 516 ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1] |
514 : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; | 517 : WebRtcAec_kNormalSmoothingCoefficients[mult - 1]; |
515 int i; | 518 int i; |
516 float sdSum = 0, seSum = 0; | 519 float sdSum = 0, seSum = 0; |
517 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); | 520 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); |
518 float32x4_t vec_sdSum = vdupq_n_f32(0.0f); | 521 float32x4_t vec_sdSum = vdupq_n_f32(0.0f); |
519 float32x4_t vec_seSum = vdupq_n_f32(0.0f); | 522 float32x4_t vec_seSum = vdupq_n_f32(0.0f); |
520 | 523 |
521 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 524 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
522 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); | 525 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); |
523 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); | 526 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); |
524 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); | 527 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); |
525 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); | 528 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); |
526 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); | 529 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); |
527 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); | 530 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); |
528 float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); | 531 float32x4_t vec_sd = |
529 float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); | 532 vmulq_n_f32(vld1q_f32(&coherence_state->sd[i]), ptrGCoh[0]); |
530 float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); | 533 float32x4_t vec_se = |
| 534 vmulq_n_f32(vld1q_f32(&coherence_state->se[i]), ptrGCoh[0]); |
| 535 float32x4_t vec_sx = |
| 536 vmulq_n_f32(vld1q_f32(&coherence_state->sx[i]), ptrGCoh[0]); |
531 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); | 537 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); |
532 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); | 538 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); |
533 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); | 539 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); |
534 | 540 |
535 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); | 541 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); |
536 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); | 542 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); |
537 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); | 543 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); |
538 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); | 544 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); |
539 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); | 545 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); |
540 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); | 546 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); |
541 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); | 547 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); |
542 | 548 |
543 vst1q_f32(&aec->sd[i], vec_sd); | 549 vst1q_f32(&coherence_state->sd[i], vec_sd); |
544 vst1q_f32(&aec->se[i], vec_se); | 550 vst1q_f32(&coherence_state->se[i], vec_se); |
545 vst1q_f32(&aec->sx[i], vec_sx); | 551 vst1q_f32(&coherence_state->sx[i], vec_sx); |
546 | 552 |
547 { | 553 { |
548 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); | 554 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]); |
549 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); | 555 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); |
550 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); | 556 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); |
551 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); | 557 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); |
552 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); | 558 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); |
553 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); | 559 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); |
554 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); | 560 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); |
555 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); | 561 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); |
556 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); | 562 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); |
557 vst2q_f32(&aec->sde[i][0], vec_sde); | 563 vst2q_f32(&coherence_state->sde[i][0], vec_sde); |
558 } | 564 } |
559 | 565 |
560 { | 566 { |
561 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); | 567 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]); |
562 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); | 568 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); |
563 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); | 569 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); |
564 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); | 570 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); |
565 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); | 571 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); |
566 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); | 572 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); |
567 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); | 573 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); |
568 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); | 574 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); |
569 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); | 575 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); |
570 vst2q_f32(&aec->sxd[i][0], vec_sxd); | 576 vst2q_f32(&coherence_state->sxd[i][0], vec_sxd); |
571 } | 577 } |
572 | 578 |
573 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); | 579 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); |
574 vec_seSum = vaddq_f32(vec_seSum, vec_se); | 580 vec_seSum = vaddq_f32(vec_seSum, vec_se); |
575 } | 581 } |
576 { | 582 { |
577 float32x2_t vec_sdSum_total; | 583 float32x2_t vec_sdSum_total; |
578 float32x2_t vec_seSum_total; | 584 float32x2_t vec_seSum_total; |
579 // A B C D | 585 // A B C D |
580 vec_sdSum_total = | 586 vec_sdSum_total = |
581 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); | 587 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); |
582 vec_seSum_total = | 588 vec_seSum_total = |
583 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); | 589 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); |
584 // A+B C+D | 590 // A+B C+D |
585 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); | 591 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); |
586 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); | 592 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); |
587 // A+B+C+D A+B+C+D | 593 // A+B+C+D A+B+C+D |
588 sdSum = vget_lane_f32(vec_sdSum_total, 0); | 594 sdSum = vget_lane_f32(vec_sdSum_total, 0); |
589 seSum = vget_lane_f32(vec_seSum_total, 0); | 595 seSum = vget_lane_f32(vec_seSum_total, 0); |
590 } | 596 } |
591 | 597 |
592 // scalar code for the remaining items. | 598 // scalar code for the remaining items. |
593 for (; i < PART_LEN1; i++) { | 599 for (; i < PART_LEN1; i++) { |
594 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + | 600 coherence_state->sd[i] = |
595 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); | 601 ptrGCoh[0] * coherence_state->sd[i] + |
596 aec->se[i] = ptrGCoh[0] * aec->se[i] + | 602 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); |
597 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); | 603 coherence_state->se[i] = |
| 604 ptrGCoh[0] * coherence_state->se[i] + |
| 605 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); |
598 // We threshold here to protect against the ill-effects of a zero farend. | 606 // We threshold here to protect against the ill-effects of a zero farend. |
599 // The threshold is not arbitrarily chosen, but balances protection and | 607 // The threshold is not arbitrarily chosen, but balances protection and |
600 // adverse interaction with the algorithm's tuning. | 608 // adverse interaction with the algorithm's tuning. |
601 // TODO(bjornv): investigate further why this is so sensitive. | 609 // TODO(bjornv): investigate further why this is so sensitive. |
602 aec->sx[i] = ptrGCoh[0] * aec->sx[i] + | 610 coherence_state->sx[i] = |
603 ptrGCoh[1] * WEBRTC_SPL_MAX( | 611 ptrGCoh[0] * coherence_state->sx[i] + |
604 xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], | 612 ptrGCoh[1] * |
605 WebRtcAec_kMinFarendPSD); | 613 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], |
| 614 WebRtcAec_kMinFarendPSD); |
606 | 615 |
607 aec->sde[i][0] = | 616 coherence_state->sde[i][0] = |
608 ptrGCoh[0] * aec->sde[i][0] + | 617 ptrGCoh[0] * coherence_state->sde[i][0] + |
609 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); | 618 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); |
610 aec->sde[i][1] = | 619 coherence_state->sde[i][1] = |
611 ptrGCoh[0] * aec->sde[i][1] + | 620 ptrGCoh[0] * coherence_state->sde[i][1] + |
612 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); | 621 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); |
613 | 622 |
614 aec->sxd[i][0] = | 623 coherence_state->sxd[i][0] = |
615 ptrGCoh[0] * aec->sxd[i][0] + | 624 ptrGCoh[0] * coherence_state->sxd[i][0] + |
616 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); | 625 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); |
617 aec->sxd[i][1] = | 626 coherence_state->sxd[i][1] = |
618 ptrGCoh[0] * aec->sxd[i][1] + | 627 ptrGCoh[0] * coherence_state->sxd[i][1] + |
619 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); | 628 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); |
620 | 629 |
621 sdSum += aec->sd[i]; | 630 sdSum += coherence_state->sd[i]; |
622 seSum += aec->se[i]; | 631 seSum += coherence_state->se[i]; |
623 } | 632 } |
624 | 633 |
625 // Divergent filter safeguard update. | 634 // Divergent filter safeguard update. |
626 aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; | 635 *filter_divergence_state = |
| 636 (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum; |
627 | 637 |
628 // Signal extreme filter divergence if the error is significantly larger | 638 // Signal extreme filter divergence if the error is significantly larger |
629 // than the nearend (13 dB). | 639 // than the nearend (13 dB). |
630 *extreme_filter_divergence = (seSum > (19.95f * sdSum)); | 640 *extreme_filter_divergence = (seSum > (19.95f * sdSum)); |
631 } | 641 } |
632 | 642 |
633 // Window time domain data to be used by the fft. | 643 // Window time domain data to be used by the fft. |
634 static void WindowDataNEON(float* x_windowed, const float* x) { | 644 static void WindowDataNEON(float* x_windowed, const float* x) { |
635 int i; | 645 int i; |
636 for (i = 0; i < PART_LEN; i += 4) { | 646 for (i = 0; i < PART_LEN; i += 4) { |
(...skipping 23 matching lines...) Expand all Loading... |
660 vst1q_f32(&data_complex[0][i], vec_data.val[0]); | 670 vst1q_f32(&data_complex[0][i], vec_data.val[0]); |
661 vst1q_f32(&data_complex[1][i], vec_data.val[1]); | 671 vst1q_f32(&data_complex[1][i], vec_data.val[1]); |
662 } | 672 } |
663 // fix beginning/end values | 673 // fix beginning/end values |
664 data_complex[1][0] = 0; | 674 data_complex[1][0] = 0; |
665 data_complex[1][PART_LEN] = 0; | 675 data_complex[1][PART_LEN] = 0; |
666 data_complex[0][0] = data[0]; | 676 data_complex[0][0] = data[0]; |
667 data_complex[0][PART_LEN] = data[1]; | 677 data_complex[0][PART_LEN] = data[1]; |
668 } | 678 } |
669 | 679 |
670 static void SubbandCoherenceNEON(AecCore* aec, | 680 static void SubbandCoherenceNEON(int mult, |
| 681 bool extended_filter_enabled, |
671 float efw[2][PART_LEN1], | 682 float efw[2][PART_LEN1], |
672 float dfw[2][PART_LEN1], | 683 float dfw[2][PART_LEN1], |
673 float xfw[2][PART_LEN1], | 684 float xfw[2][PART_LEN1], |
674 float* fft, | 685 float* fft, |
675 float* cohde, | 686 float* cohde, |
676 float* cohxd, | 687 float* cohxd, |
| 688 CoherenceState* coherence_state, |
| 689 short* filter_divergence_state, |
677 int* extreme_filter_divergence) { | 690 int* extreme_filter_divergence) { |
678 int i; | 691 int i; |
679 | 692 |
680 SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); | 693 SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state, |
| 694 filter_divergence_state, extreme_filter_divergence); |
681 | 695 |
682 { | 696 { |
683 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); | 697 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); |
684 | 698 |
685 // Subband coherence | 699 // Subband coherence |
686 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 700 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
687 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); | 701 const float32x4_t vec_sd = vld1q_f32(&coherence_state->sd[i]); |
688 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); | 702 const float32x4_t vec_se = vld1q_f32(&coherence_state->se[i]); |
689 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); | 703 const float32x4_t vec_sx = vld1q_f32(&coherence_state->sx[i]); |
690 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); | 704 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); |
691 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); | 705 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); |
692 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); | 706 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]); |
693 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); | 707 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]); |
694 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); | 708 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); |
695 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); | 709 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); |
696 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); | 710 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); |
697 vec_cohde = vdivq_f32(vec_cohde, vec_sdse); | 711 vec_cohde = vdivq_f32(vec_cohde, vec_sdse); |
698 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); | 712 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); |
699 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); | 713 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); |
700 | 714 |
701 vst1q_f32(&cohde[i], vec_cohde); | 715 vst1q_f32(&cohde[i], vec_cohde); |
702 vst1q_f32(&cohxd[i], vec_cohxd); | 716 vst1q_f32(&cohxd[i], vec_cohxd); |
703 } | 717 } |
704 } | 718 } |
705 // scalar code for the remaining items. | 719 // scalar code for the remaining items. |
706 for (; i < PART_LEN1; i++) { | 720 for (; i < PART_LEN1; i++) { |
707 cohde[i] = | 721 cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] + |
708 (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / | 722 coherence_state->sde[i][1] * coherence_state->sde[i][1]) / |
709 (aec->sd[i] * aec->se[i] + 1e-10f); | 723 (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f); |
710 cohxd[i] = | 724 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + |
711 (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / | 725 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / |
712 (aec->sx[i] * aec->sd[i] + 1e-10f); | 726 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); |
713 } | 727 } |
714 } | 728 } |
715 | 729 |
716 void WebRtcAec_InitAec_neon(void) { | 730 void WebRtcAec_InitAec_neon(void) { |
717 WebRtcAec_FilterFar = FilterFarNEON; | 731 WebRtcAec_FilterFar = FilterFarNEON; |
718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 732 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 733 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 734 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 735 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 736 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
723 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 737 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
724 WebRtcAec_WindowData = WindowDataNEON; | 738 WebRtcAec_WindowData = WindowDataNEON; |
725 } | 739 } |
726 } // namespace webrtc | 740 } // namespace webrtc |
OLD | NEW |