| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 495 | 495 |
| 496 // Updates the following smoothed Power Spectral Densities (PSD): | 496 // Updates the following smoothed Power Spectral Densities (PSD): |
| 497 // - sd : near-end | 497 // - sd : near-end |
| 498 // - se : residual echo | 498 // - se : residual echo |
| 499 // - sx : far-end | 499 // - sx : far-end |
| 500 // - sde : cross-PSD of near-end and residual echo | 500 // - sde : cross-PSD of near-end and residual echo |
| 501 // - sxd : cross-PSD of near-end and far-end | 501 // - sxd : cross-PSD of near-end and far-end |
| 502 // | 502 // |
| 503 // In addition to updating the PSDs, also the filter diverge state is determined | 503 // In addition to updating the PSDs, also the filter diverge state is determined |
| 504 // upon actions are taken. | 504 // upon actions are taken. |
| 505 static void SmoothedPSD(AecCore* aec, | 505 static void SmoothedPSD(int mult, |
| 506 bool extended_filter_enabled, |
| 506 float efw[2][PART_LEN1], | 507 float efw[2][PART_LEN1], |
| 507 float dfw[2][PART_LEN1], | 508 float dfw[2][PART_LEN1], |
| 508 float xfw[2][PART_LEN1], | 509 float xfw[2][PART_LEN1], |
| 510 CoherenceState* coherence_state, |
| 511 short* filter_divergence_state, |
| 509 int* extreme_filter_divergence) { | 512 int* extreme_filter_divergence) { |
| 510 // Power estimate smoothing coefficients. | 513 // Power estimate smoothing coefficients. |
| 511 const float* ptrGCoh = | 514 const float* ptrGCoh = |
| 512 aec->extended_filter_enabled | 515 extended_filter_enabled |
| 513 ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] | 516 ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1] |
| 514 : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; | 517 : WebRtcAec_kNormalSmoothingCoefficients[mult - 1]; |
| 515 int i; | 518 int i; |
| 516 float sdSum = 0, seSum = 0; | 519 float sdSum = 0, seSum = 0; |
| 517 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); | 520 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); |
| 518 float32x4_t vec_sdSum = vdupq_n_f32(0.0f); | 521 float32x4_t vec_sdSum = vdupq_n_f32(0.0f); |
| 519 float32x4_t vec_seSum = vdupq_n_f32(0.0f); | 522 float32x4_t vec_seSum = vdupq_n_f32(0.0f); |
| 520 | 523 |
| 521 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 524 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 522 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); | 525 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); |
| 523 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); | 526 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); |
| 524 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); | 527 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); |
| 525 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); | 528 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); |
| 526 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); | 529 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); |
| 527 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); | 530 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); |
| 528 float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); | 531 float32x4_t vec_sd = |
| 529 float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); | 532 vmulq_n_f32(vld1q_f32(&coherence_state->sd[i]), ptrGCoh[0]); |
| 530 float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); | 533 float32x4_t vec_se = |
| 534 vmulq_n_f32(vld1q_f32(&coherence_state->se[i]), ptrGCoh[0]); |
| 535 float32x4_t vec_sx = |
| 536 vmulq_n_f32(vld1q_f32(&coherence_state->sx[i]), ptrGCoh[0]); |
| 531 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); | 537 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); |
| 532 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); | 538 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); |
| 533 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); | 539 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); |
| 534 | 540 |
| 535 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); | 541 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); |
| 536 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); | 542 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); |
| 537 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); | 543 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); |
| 538 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); | 544 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); |
| 539 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); | 545 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); |
| 540 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); | 546 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); |
| 541 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); | 547 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); |
| 542 | 548 |
| 543 vst1q_f32(&aec->sd[i], vec_sd); | 549 vst1q_f32(&coherence_state->sd[i], vec_sd); |
| 544 vst1q_f32(&aec->se[i], vec_se); | 550 vst1q_f32(&coherence_state->se[i], vec_se); |
| 545 vst1q_f32(&aec->sx[i], vec_sx); | 551 vst1q_f32(&coherence_state->sx[i], vec_sx); |
| 546 | 552 |
| 547 { | 553 { |
| 548 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); | 554 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]); |
| 549 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); | 555 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); |
| 550 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); | 556 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); |
| 551 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); | 557 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); |
| 552 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); | 558 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); |
| 553 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); | 559 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); |
| 554 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); | 560 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); |
| 555 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); | 561 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); |
| 556 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); | 562 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); |
| 557 vst2q_f32(&aec->sde[i][0], vec_sde); | 563 vst2q_f32(&coherence_state->sde[i][0], vec_sde); |
| 558 } | 564 } |
| 559 | 565 |
| 560 { | 566 { |
| 561 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); | 567 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]); |
| 562 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); | 568 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); |
| 563 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); | 569 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); |
| 564 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); | 570 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); |
| 565 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); | 571 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); |
| 566 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); | 572 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); |
| 567 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); | 573 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); |
| 568 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); | 574 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); |
| 569 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); | 575 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); |
| 570 vst2q_f32(&aec->sxd[i][0], vec_sxd); | 576 vst2q_f32(&coherence_state->sxd[i][0], vec_sxd); |
| 571 } | 577 } |
| 572 | 578 |
| 573 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); | 579 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); |
| 574 vec_seSum = vaddq_f32(vec_seSum, vec_se); | 580 vec_seSum = vaddq_f32(vec_seSum, vec_se); |
| 575 } | 581 } |
| 576 { | 582 { |
| 577 float32x2_t vec_sdSum_total; | 583 float32x2_t vec_sdSum_total; |
| 578 float32x2_t vec_seSum_total; | 584 float32x2_t vec_seSum_total; |
| 579 // A B C D | 585 // A B C D |
| 580 vec_sdSum_total = | 586 vec_sdSum_total = |
| 581 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); | 587 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); |
| 582 vec_seSum_total = | 588 vec_seSum_total = |
| 583 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); | 589 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); |
| 584 // A+B C+D | 590 // A+B C+D |
| 585 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); | 591 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); |
| 586 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); | 592 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); |
| 587 // A+B+C+D A+B+C+D | 593 // A+B+C+D A+B+C+D |
| 588 sdSum = vget_lane_f32(vec_sdSum_total, 0); | 594 sdSum = vget_lane_f32(vec_sdSum_total, 0); |
| 589 seSum = vget_lane_f32(vec_seSum_total, 0); | 595 seSum = vget_lane_f32(vec_seSum_total, 0); |
| 590 } | 596 } |
| 591 | 597 |
| 592 // scalar code for the remaining items. | 598 // scalar code for the remaining items. |
| 593 for (; i < PART_LEN1; i++) { | 599 for (; i < PART_LEN1; i++) { |
| 594 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + | 600 coherence_state->sd[i] = |
| 595 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); | 601 ptrGCoh[0] * coherence_state->sd[i] + |
| 596 aec->se[i] = ptrGCoh[0] * aec->se[i] + | 602 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); |
| 597 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); | 603 coherence_state->se[i] = |
| 604 ptrGCoh[0] * coherence_state->se[i] + |
| 605 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); |
| 598 // We threshold here to protect against the ill-effects of a zero farend. | 606 // We threshold here to protect against the ill-effects of a zero farend. |
| 599 // The threshold is not arbitrarily chosen, but balances protection and | 607 // The threshold is not arbitrarily chosen, but balances protection and |
| 600 // adverse interaction with the algorithm's tuning. | 608 // adverse interaction with the algorithm's tuning. |
| 601 // TODO(bjornv): investigate further why this is so sensitive. | 609 // TODO(bjornv): investigate further why this is so sensitive. |
| 602 aec->sx[i] = ptrGCoh[0] * aec->sx[i] + | 610 coherence_state->sx[i] = |
| 603 ptrGCoh[1] * WEBRTC_SPL_MAX( | 611 ptrGCoh[0] * coherence_state->sx[i] + |
| 604 xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], | 612 ptrGCoh[1] * |
| 605 WebRtcAec_kMinFarendPSD); | 613 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], |
| 614 WebRtcAec_kMinFarendPSD); |
| 606 | 615 |
| 607 aec->sde[i][0] = | 616 coherence_state->sde[i][0] = |
| 608 ptrGCoh[0] * aec->sde[i][0] + | 617 ptrGCoh[0] * coherence_state->sde[i][0] + |
| 609 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); | 618 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); |
| 610 aec->sde[i][1] = | 619 coherence_state->sde[i][1] = |
| 611 ptrGCoh[0] * aec->sde[i][1] + | 620 ptrGCoh[0] * coherence_state->sde[i][1] + |
| 612 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); | 621 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); |
| 613 | 622 |
| 614 aec->sxd[i][0] = | 623 coherence_state->sxd[i][0] = |
| 615 ptrGCoh[0] * aec->sxd[i][0] + | 624 ptrGCoh[0] * coherence_state->sxd[i][0] + |
| 616 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); | 625 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); |
| 617 aec->sxd[i][1] = | 626 coherence_state->sxd[i][1] = |
| 618 ptrGCoh[0] * aec->sxd[i][1] + | 627 ptrGCoh[0] * coherence_state->sxd[i][1] + |
| 619 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); | 628 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); |
| 620 | 629 |
| 621 sdSum += aec->sd[i]; | 630 sdSum += coherence_state->sd[i]; |
| 622 seSum += aec->se[i]; | 631 seSum += coherence_state->se[i]; |
| 623 } | 632 } |
| 624 | 633 |
| 625 // Divergent filter safeguard update. | 634 // Divergent filter safeguard update. |
| 626 aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; | 635 *filter_divergence_state = |
| 636 (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum; |
| 627 | 637 |
| 628 // Signal extreme filter divergence if the error is significantly larger | 638 // Signal extreme filter divergence if the error is significantly larger |
| 629 // than the nearend (13 dB). | 639 // than the nearend (13 dB). |
| 630 *extreme_filter_divergence = (seSum > (19.95f * sdSum)); | 640 *extreme_filter_divergence = (seSum > (19.95f * sdSum)); |
| 631 } | 641 } |
| 632 | 642 |
| 633 // Window time domain data to be used by the fft. | 643 // Window time domain data to be used by the fft. |
| 634 static void WindowDataNEON(float* x_windowed, const float* x) { | 644 static void WindowDataNEON(float* x_windowed, const float* x) { |
| 635 int i; | 645 int i; |
| 636 for (i = 0; i < PART_LEN; i += 4) { | 646 for (i = 0; i < PART_LEN; i += 4) { |
| (...skipping 23 matching lines...) Expand all Loading... |
| 660 vst1q_f32(&data_complex[0][i], vec_data.val[0]); | 670 vst1q_f32(&data_complex[0][i], vec_data.val[0]); |
| 661 vst1q_f32(&data_complex[1][i], vec_data.val[1]); | 671 vst1q_f32(&data_complex[1][i], vec_data.val[1]); |
| 662 } | 672 } |
| 663 // fix beginning/end values | 673 // fix beginning/end values |
| 664 data_complex[1][0] = 0; | 674 data_complex[1][0] = 0; |
| 665 data_complex[1][PART_LEN] = 0; | 675 data_complex[1][PART_LEN] = 0; |
| 666 data_complex[0][0] = data[0]; | 676 data_complex[0][0] = data[0]; |
| 667 data_complex[0][PART_LEN] = data[1]; | 677 data_complex[0][PART_LEN] = data[1]; |
| 668 } | 678 } |
| 669 | 679 |
| 670 static void SubbandCoherenceNEON(AecCore* aec, | 680 static void SubbandCoherenceNEON(int mult, |
| 681 bool extended_filter_enabled, |
| 671 float efw[2][PART_LEN1], | 682 float efw[2][PART_LEN1], |
| 672 float dfw[2][PART_LEN1], | 683 float dfw[2][PART_LEN1], |
| 673 float xfw[2][PART_LEN1], | 684 float xfw[2][PART_LEN1], |
| 674 float* fft, | 685 float* fft, |
| 675 float* cohde, | 686 float* cohde, |
| 676 float* cohxd, | 687 float* cohxd, |
| 688 CoherenceState* coherence_state, |
| 689 short* filter_divergence_state, |
| 677 int* extreme_filter_divergence) { | 690 int* extreme_filter_divergence) { |
| 678 int i; | 691 int i; |
| 679 | 692 |
| 680 SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); | 693 SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state, |
| 694 filter_divergence_state, extreme_filter_divergence); |
| 681 | 695 |
| 682 { | 696 { |
| 683 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); | 697 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); |
| 684 | 698 |
| 685 // Subband coherence | 699 // Subband coherence |
| 686 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 700 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 687 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); | 701 const float32x4_t vec_sd = vld1q_f32(&coherence_state->sd[i]); |
| 688 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); | 702 const float32x4_t vec_se = vld1q_f32(&coherence_state->se[i]); |
| 689 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); | 703 const float32x4_t vec_sx = vld1q_f32(&coherence_state->sx[i]); |
| 690 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); | 704 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); |
| 691 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); | 705 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); |
| 692 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); | 706 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]); |
| 693 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); | 707 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]); |
| 694 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); | 708 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); |
| 695 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); | 709 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); |
| 696 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); | 710 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); |
| 697 vec_cohde = vdivq_f32(vec_cohde, vec_sdse); | 711 vec_cohde = vdivq_f32(vec_cohde, vec_sdse); |
| 698 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); | 712 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); |
| 699 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); | 713 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); |
| 700 | 714 |
| 701 vst1q_f32(&cohde[i], vec_cohde); | 715 vst1q_f32(&cohde[i], vec_cohde); |
| 702 vst1q_f32(&cohxd[i], vec_cohxd); | 716 vst1q_f32(&cohxd[i], vec_cohxd); |
| 703 } | 717 } |
| 704 } | 718 } |
| 705 // scalar code for the remaining items. | 719 // scalar code for the remaining items. |
| 706 for (; i < PART_LEN1; i++) { | 720 for (; i < PART_LEN1; i++) { |
| 707 cohde[i] = | 721 cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] + |
| 708 (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / | 722 coherence_state->sde[i][1] * coherence_state->sde[i][1]) / |
| 709 (aec->sd[i] * aec->se[i] + 1e-10f); | 723 (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f); |
| 710 cohxd[i] = | 724 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + |
| 711 (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / | 725 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / |
| 712 (aec->sx[i] * aec->sd[i] + 1e-10f); | 726 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); |
| 713 } | 727 } |
| 714 } | 728 } |
| 715 | 729 |
| 716 void WebRtcAec_InitAec_neon(void) { | 730 void WebRtcAec_InitAec_neon(void) { |
| 717 WebRtcAec_FilterFar = FilterFarNEON; | 731 WebRtcAec_FilterFar = FilterFarNEON; |
| 718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 732 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
| 719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 733 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
| 720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 734 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
| 721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 735 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
| 722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 736 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
| 723 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 737 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
| 724 WebRtcAec_WindowData = WindowDataNEON; | 738 WebRtcAec_WindowData = WindowDataNEON; |
| 725 } | 739 } |
| 726 } // namespace webrtc | 740 } // namespace webrtc |
| OLD | NEW |