webrtc/modules/audio_processing/aec/aec_core_neon.cc - Issue 1936173002: Changed the AEC SubbandCoherence function to not use the full aec state

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 1936173002: Changed the AEC SubbandCoherence function to not use the full aec state (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec1_CL

Patch Set: Fixed bad merge Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 484 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
495	495

496 // Updates the following smoothed Power Spectral Densities (PSD):	496 // Updates the following smoothed Power Spectral Densities (PSD):

497 // - sd : near-end	497 // - sd : near-end

498 // - se : residual echo	498 // - se : residual echo

499 // - sx : far-end	499 // - sx : far-end

500 // - sde : cross-PSD of near-end and residual echo	500 // - sde : cross-PSD of near-end and residual echo

501 // - sxd : cross-PSD of near-end and far-end	501 // - sxd : cross-PSD of near-end and far-end

502 //	502 //

503 // In addition to updating the PSDs, also the filter diverge state is determined	503 // In addition to updating the PSDs, also the filter diverge state is determined

504 // upon actions are taken.	504 // upon actions are taken.

505 static void SmoothedPSD(AecCore* aec,	505 static void SmoothedPSD(int mult,

	506 bool extended_filter_enabled,

506 float efw[2][PART_LEN1],	507 float efw[2][PART_LEN1],

507 float dfw[2][PART_LEN1],	508 float dfw[2][PART_LEN1],

508 float xfw[2][PART_LEN1],	509 float xfw[2][PART_LEN1],

	510 CoherenceState* coherence_state,

	511 short* filter_divergence_state,

509 int* extreme_filter_divergence) {	512 int* extreme_filter_divergence) {

510 // Power estimate smoothing coefficients.	513 // Power estimate smoothing coefficients.

511 const float* ptrGCoh =	514 const float* ptrGCoh =

512 aec->extended_filter_enabled	515 extended_filter_enabled

513 ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]	516 ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1]

514 : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];	517 : WebRtcAec_kNormalSmoothingCoefficients[mult - 1];

515 int i;	518 int i;

516 float sdSum = 0, seSum = 0;	519 float sdSum = 0, seSum = 0;

517 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);	520 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);

518 float32x4_t vec_sdSum = vdupq_n_f32(0.0f);	521 float32x4_t vec_sdSum = vdupq_n_f32(0.0f);

519 float32x4_t vec_seSum = vdupq_n_f32(0.0f);	522 float32x4_t vec_seSum = vdupq_n_f32(0.0f);

520	523

521 for (i = 0; i + 3 < PART_LEN1; i += 4) {	524 for (i = 0; i + 3 < PART_LEN1; i += 4) {

522 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);	525 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);

523 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);	526 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);

524 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);	527 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);

525 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);	528 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);

526 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);	529 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);

527 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);	530 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);

528 float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);	531 float32x4_t vec_sd =

529 float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);	532 vmulq_n_f32(vld1q_f32(&coherence_state->sd[i]), ptrGCoh[0]);

530 float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);	533 float32x4_t vec_se =

	534 vmulq_n_f32(vld1q_f32(&coherence_state->se[i]), ptrGCoh[0]);

	535 float32x4_t vec_sx =

	536 vmulq_n_f32(vld1q_f32(&coherence_state->sx[i]), ptrGCoh[0]);

531 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);	537 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);

532 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);	538 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);

533 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);	539 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);

534	540

535 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);	541 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);

536 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);	542 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);

537 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);	543 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);

538 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);	544 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);

539 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);	545 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);

540 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);	546 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);

541 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);	547 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);

542	548

543 vst1q_f32(&aec->sd[i], vec_sd);	549 vst1q_f32(&coherence_state->sd[i], vec_sd);

544 vst1q_f32(&aec->se[i], vec_se);	550 vst1q_f32(&coherence_state->se[i], vec_se);

545 vst1q_f32(&aec->sx[i], vec_sx);	551 vst1q_f32(&coherence_state->sx[i], vec_sx);

546	552

547 {	553 {

548 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);	554 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]);

549 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);	555 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);

550 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);	556 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);

551 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);	557 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);

552 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);	558 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);

553 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);	559 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);

554 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);	560 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);

555 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);	561 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);

556 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);	562 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);

557 vst2q_f32(&aec->sde[i][0], vec_sde);	563 vst2q_f32(&coherence_state->sde[i][0], vec_sde);

558 }	564 }

559	565

560 {	566 {

561 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);	567 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]);

562 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);	568 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);

563 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);	569 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);

564 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);	570 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);

565 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);	571 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);

566 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);	572 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);

567 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);	573 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);

568 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);	574 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);

569 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);	575 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);

570 vst2q_f32(&aec->sxd[i][0], vec_sxd);	576 vst2q_f32(&coherence_state->sxd[i][0], vec_sxd);

571 }	577 }

572	578

573 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);	579 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);

574 vec_seSum = vaddq_f32(vec_seSum, vec_se);	580 vec_seSum = vaddq_f32(vec_seSum, vec_se);

575 }	581 }

576 {	582 {

577 float32x2_t vec_sdSum_total;	583 float32x2_t vec_sdSum_total;

578 float32x2_t vec_seSum_total;	584 float32x2_t vec_seSum_total;

579 // A B C D	585 // A B C D

580 vec_sdSum_total =	586 vec_sdSum_total =

581 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum));	587 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum));

582 vec_seSum_total =	588 vec_seSum_total =

583 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum));	589 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum));

584 // A+B C+D	590 // A+B C+D

585 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);	591 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);

586 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);	592 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);

587 // A+B+C+D A+B+C+D	593 // A+B+C+D A+B+C+D

588 sdSum = vget_lane_f32(vec_sdSum_total, 0);	594 sdSum = vget_lane_f32(vec_sdSum_total, 0);

589 seSum = vget_lane_f32(vec_seSum_total, 0);	595 seSum = vget_lane_f32(vec_seSum_total, 0);

590 }	596 }

591	597

592 // scalar code for the remaining items.	598 // scalar code for the remaining items.

593 for (; i < PART_LEN1; i++) {	599 for (; i < PART_LEN1; i++) {

594 aec->sd[i] = ptrGCoh[0] * aec->sd[i] +	600 coherence_state->sd[i] =

595 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);	601 ptrGCoh[0] * coherence_state->sd[i] +

596 aec->se[i] = ptrGCoh[0] * aec->se[i] +	602 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);

597 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);	603 coherence_state->se[i] =

	604 ptrGCoh[0] * coherence_state->se[i] +

	605 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);

598 // We threshold here to protect against the ill-effects of a zero farend.	606 // We threshold here to protect against the ill-effects of a zero farend.

599 // The threshold is not arbitrarily chosen, but balances protection and	607 // The threshold is not arbitrarily chosen, but balances protection and

600 // adverse interaction with the algorithm's tuning.	608 // adverse interaction with the algorithm's tuning.

601 // TODO(bjornv): investigate further why this is so sensitive.	609 // TODO(bjornv): investigate further why this is so sensitive.

602 aec->sx[i] = ptrGCoh[0] * aec->sx[i] +	610 coherence_state->sx[i] =

603 ptrGCoh[1] * WEBRTC_SPL_MAX(	611 ptrGCoh[0] * coherence_state->sx[i] +

604 xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],	612 ptrGCoh[1] *

605 WebRtcAec_kMinFarendPSD);	613 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],

	614 WebRtcAec_kMinFarendPSD);

606	615

607 aec->sde[i][0] =	616 coherence_state->sde[i][0] =

608 ptrGCoh[0] * aec->sde[i][0] +	617 ptrGCoh[0] * coherence_state->sde[i][0] +

609 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);	618 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);

610 aec->sde[i][1] =	619 coherence_state->sde[i][1] =

611 ptrGCoh[0] * aec->sde[i][1] +	620 ptrGCoh[0] * coherence_state->sde[i][1] +

612 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);	621 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);

613	622

614 aec->sxd[i][0] =	623 coherence_state->sxd[i][0] =

615 ptrGCoh[0] * aec->sxd[i][0] +	624 ptrGCoh[0] * coherence_state->sxd[i][0] +

616 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);	625 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);

617 aec->sxd[i][1] =	626 coherence_state->sxd[i][1] =

618 ptrGCoh[0] * aec->sxd[i][1] +	627 ptrGCoh[0] * coherence_state->sxd[i][1] +

619 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);	628 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);

620	629

621 sdSum += aec->sd[i];	630 sdSum += coherence_state->sd[i];

622 seSum += aec->se[i];	631 seSum += coherence_state->se[i];

623 }	632 }

624	633

625 // Divergent filter safeguard update.	634 // Divergent filter safeguard update.

626 aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;	635 *filter_divergence_state =

	636 (filter_divergence_state ? 1.05f : 1.0f) seSum > sdSum;

627	637

628 // Signal extreme filter divergence if the error is significantly larger	638 // Signal extreme filter divergence if the error is significantly larger

629 // than the nearend (13 dB).	639 // than the nearend (13 dB).

630 extreme_filter_divergence = (seSum > (19.95f sdSum));	640 extreme_filter_divergence = (seSum > (19.95f sdSum));

631 }	641 }

632	642

633 // Window time domain data to be used by the fft.	643 // Window time domain data to be used by the fft.

634 static void WindowDataNEON(float* x_windowed, const float* x) {	644 static void WindowDataNEON(float* x_windowed, const float* x) {

635 int i;	645 int i;

636 for (i = 0; i < PART_LEN; i += 4) {	646 for (i = 0; i < PART_LEN; i += 4) {

(...skipping 23 matching lines...) Expand all Loading...
660 vst1q_f32(&data_complex[0][i], vec_data.val[0]);	670 vst1q_f32(&data_complex[0][i], vec_data.val[0]);

661 vst1q_f32(&data_complex[1][i], vec_data.val[1]);	671 vst1q_f32(&data_complex[1][i], vec_data.val[1]);

662 }	672 }

663 // fix beginning/end values	673 // fix beginning/end values

664 data_complex[1][0] = 0;	674 data_complex[1][0] = 0;

665 data_complex[1][PART_LEN] = 0;	675 data_complex[1][PART_LEN] = 0;

666 data_complex[0][0] = data[0];	676 data_complex[0][0] = data[0];

667 data_complex[0][PART_LEN] = data[1];	677 data_complex[0][PART_LEN] = data[1];

668 }	678 }

669	679

670 static void SubbandCoherenceNEON(AecCore* aec,	680 static void SubbandCoherenceNEON(int mult,

	681 bool extended_filter_enabled,

671 float efw[2][PART_LEN1],	682 float efw[2][PART_LEN1],

672 float dfw[2][PART_LEN1],	683 float dfw[2][PART_LEN1],

673 float xfw[2][PART_LEN1],	684 float xfw[2][PART_LEN1],

674 float* fft,	685 float* fft,

675 float* cohde,	686 float* cohde,

676 float* cohxd,	687 float* cohxd,

	688 CoherenceState* coherence_state,

	689 short* filter_divergence_state,

677 int* extreme_filter_divergence) {	690 int* extreme_filter_divergence) {

678 int i;	691 int i;

679	692

680 SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);	693 SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state,

	694 filter_divergence_state, extreme_filter_divergence);

681	695

682 {	696 {

683 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);	697 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);

684	698

685 // Subband coherence	699 // Subband coherence

686 for (i = 0; i + 3 < PART_LEN1; i += 4) {	700 for (i = 0; i + 3 < PART_LEN1; i += 4) {

687 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);	701 const float32x4_t vec_sd = vld1q_f32(&coherence_state->sd[i]);

688 const float32x4_t vec_se = vld1q_f32(&aec->se[i]);	702 const float32x4_t vec_se = vld1q_f32(&coherence_state->se[i]);

689 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);	703 const float32x4_t vec_sx = vld1q_f32(&coherence_state->sx[i]);

690 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);	704 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);

691 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);	705 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);

692 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);	706 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]);

693 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);	707 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]);

694 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);	708 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);

695 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);	709 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);

696 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);	710 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);

697 vec_cohde = vdivq_f32(vec_cohde, vec_sdse);	711 vec_cohde = vdivq_f32(vec_cohde, vec_sdse);

698 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);	712 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);

699 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);	713 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);

700	714

701 vst1q_f32(&cohde[i], vec_cohde);	715 vst1q_f32(&cohde[i], vec_cohde);

702 vst1q_f32(&cohxd[i], vec_cohxd);	716 vst1q_f32(&cohxd[i], vec_cohxd);

703 }	717 }

704 }	718 }

705 // scalar code for the remaining items.	719 // scalar code for the remaining items.

706 for (; i < PART_LEN1; i++) {	720 for (; i < PART_LEN1; i++) {

707 cohde[i] =	721 cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] +

708 (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /	722 coherence_state->sde[i][1] * coherence_state->sde[i][1]) /

709 (aec->sd[i] * aec->se[i] + 1e-10f);	723 (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f);

710 cohxd[i] =	724 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +

711 (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /	725 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /

712 (aec->sx[i] * aec->sd[i] + 1e-10f);	726 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);

713 }	727 }

714 }	728 }

715	729

716 void WebRtcAec_InitAec_neon(void) {	730 void WebRtcAec_InitAec_neon(void) {

717 WebRtcAec_FilterFar = FilterFarNEON;	731 WebRtcAec_FilterFar = FilterFarNEON;

718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;	732 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;

719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;	733 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;

720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;	734 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;

721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;	735 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;

722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;	736 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;

723 WebRtcAec_PartitionDelay = PartitionDelayNEON;	737 WebRtcAec_PartitionDelay = PartitionDelayNEON;

724 WebRtcAec_WindowData = WindowDataNEON;	738 WebRtcAec_WindowData = WindowDataNEON;

725 }	739 }

726 } // namespace webrtc	740 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.cc » ('j') | no next file with comments »