Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 1936173002: Changed the AEC SubbandCoherence function to not use the full aec state (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec1_CL
Patch Set: Fixed bad merge Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 484 matching lines...) Expand 10 before | Expand all | Expand 10 after
495 495
496 // Updates the following smoothed Power Spectral Densities (PSD): 496 // Updates the following smoothed Power Spectral Densities (PSD):
497 // - sd : near-end 497 // - sd : near-end
498 // - se : residual echo 498 // - se : residual echo
499 // - sx : far-end 499 // - sx : far-end
500 // - sde : cross-PSD of near-end and residual echo 500 // - sde : cross-PSD of near-end and residual echo
501 // - sxd : cross-PSD of near-end and far-end 501 // - sxd : cross-PSD of near-end and far-end
502 // 502 //
503 // In addition to updating the PSDs, also the filter diverge state is determined 503 // In addition to updating the PSDs, also the filter diverge state is determined
504 // upon actions are taken. 504 // upon actions are taken.
505 static void SmoothedPSD(AecCore* aec, 505 static void SmoothedPSD(int mult,
506 bool extended_filter_enabled,
506 float efw[2][PART_LEN1], 507 float efw[2][PART_LEN1],
507 float dfw[2][PART_LEN1], 508 float dfw[2][PART_LEN1],
508 float xfw[2][PART_LEN1], 509 float xfw[2][PART_LEN1],
510 CoherenceState* coherence_state,
511 short* filter_divergence_state,
509 int* extreme_filter_divergence) { 512 int* extreme_filter_divergence) {
510 // Power estimate smoothing coefficients. 513 // Power estimate smoothing coefficients.
511 const float* ptrGCoh = 514 const float* ptrGCoh =
512 aec->extended_filter_enabled 515 extended_filter_enabled
513 ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] 516 ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1]
514 : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; 517 : WebRtcAec_kNormalSmoothingCoefficients[mult - 1];
515 int i; 518 int i;
516 float sdSum = 0, seSum = 0; 519 float sdSum = 0, seSum = 0;
517 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); 520 const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);
518 float32x4_t vec_sdSum = vdupq_n_f32(0.0f); 521 float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
519 float32x4_t vec_seSum = vdupq_n_f32(0.0f); 522 float32x4_t vec_seSum = vdupq_n_f32(0.0f);
520 523
521 for (i = 0; i + 3 < PART_LEN1; i += 4) { 524 for (i = 0; i + 3 < PART_LEN1; i += 4) {
522 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); 525 const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
523 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); 526 const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
524 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); 527 const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
525 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); 528 const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
526 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); 529 const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
527 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); 530 const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
528 float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); 531 float32x4_t vec_sd =
529 float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); 532 vmulq_n_f32(vld1q_f32(&coherence_state->sd[i]), ptrGCoh[0]);
530 float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); 533 float32x4_t vec_se =
534 vmulq_n_f32(vld1q_f32(&coherence_state->se[i]), ptrGCoh[0]);
535 float32x4_t vec_sx =
536 vmulq_n_f32(vld1q_f32(&coherence_state->sx[i]), ptrGCoh[0]);
531 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); 537 float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
532 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); 538 float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
533 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); 539 float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
534 540
535 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); 541 vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
536 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); 542 vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
537 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); 543 vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
538 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); 544 vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
539 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); 545 vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
540 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); 546 vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
541 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); 547 vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
542 548
543 vst1q_f32(&aec->sd[i], vec_sd); 549 vst1q_f32(&coherence_state->sd[i], vec_sd);
544 vst1q_f32(&aec->se[i], vec_se); 550 vst1q_f32(&coherence_state->se[i], vec_se);
545 vst1q_f32(&aec->sx[i], vec_sx); 551 vst1q_f32(&coherence_state->sx[i], vec_sx);
546 552
547 { 553 {
548 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); 554 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]);
549 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); 555 float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
550 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); 556 float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
551 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); 557 vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
552 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); 558 vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
553 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); 559 vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
554 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); 560 vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
555 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); 561 vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
556 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); 562 vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
557 vst2q_f32(&aec->sde[i][0], vec_sde); 563 vst2q_f32(&coherence_state->sde[i][0], vec_sde);
558 } 564 }
559 565
560 { 566 {
561 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); 567 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]);
562 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); 568 float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
563 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); 569 float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
564 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); 570 vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
565 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); 571 vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
566 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); 572 vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
567 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); 573 vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
568 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); 574 vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
569 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); 575 vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
570 vst2q_f32(&aec->sxd[i][0], vec_sxd); 576 vst2q_f32(&coherence_state->sxd[i][0], vec_sxd);
571 } 577 }
572 578
573 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); 579 vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
574 vec_seSum = vaddq_f32(vec_seSum, vec_se); 580 vec_seSum = vaddq_f32(vec_seSum, vec_se);
575 } 581 }
576 { 582 {
577 float32x2_t vec_sdSum_total; 583 float32x2_t vec_sdSum_total;
578 float32x2_t vec_seSum_total; 584 float32x2_t vec_seSum_total;
579 // A B C D 585 // A B C D
580 vec_sdSum_total = 586 vec_sdSum_total =
581 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); 587 vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum));
582 vec_seSum_total = 588 vec_seSum_total =
583 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); 589 vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum));
584 // A+B C+D 590 // A+B C+D
585 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); 591 vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
586 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); 592 vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
587 // A+B+C+D A+B+C+D 593 // A+B+C+D A+B+C+D
588 sdSum = vget_lane_f32(vec_sdSum_total, 0); 594 sdSum = vget_lane_f32(vec_sdSum_total, 0);
589 seSum = vget_lane_f32(vec_seSum_total, 0); 595 seSum = vget_lane_f32(vec_seSum_total, 0);
590 } 596 }
591 597
592 // scalar code for the remaining items. 598 // scalar code for the remaining items.
593 for (; i < PART_LEN1; i++) { 599 for (; i < PART_LEN1; i++) {
594 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + 600 coherence_state->sd[i] =
595 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); 601 ptrGCoh[0] * coherence_state->sd[i] +
596 aec->se[i] = ptrGCoh[0] * aec->se[i] + 602 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
597 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); 603 coherence_state->se[i] =
604 ptrGCoh[0] * coherence_state->se[i] +
605 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
598 // We threshold here to protect against the ill-effects of a zero farend. 606 // We threshold here to protect against the ill-effects of a zero farend.
599 // The threshold is not arbitrarily chosen, but balances protection and 607 // The threshold is not arbitrarily chosen, but balances protection and
600 // adverse interaction with the algorithm's tuning. 608 // adverse interaction with the algorithm's tuning.
601 // TODO(bjornv): investigate further why this is so sensitive. 609 // TODO(bjornv): investigate further why this is so sensitive.
602 aec->sx[i] = ptrGCoh[0] * aec->sx[i] + 610 coherence_state->sx[i] =
603 ptrGCoh[1] * WEBRTC_SPL_MAX( 611 ptrGCoh[0] * coherence_state->sx[i] +
604 xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 612 ptrGCoh[1] *
605 WebRtcAec_kMinFarendPSD); 613 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
614 WebRtcAec_kMinFarendPSD);
606 615
607 aec->sde[i][0] = 616 coherence_state->sde[i][0] =
608 ptrGCoh[0] * aec->sde[i][0] + 617 ptrGCoh[0] * coherence_state->sde[i][0] +
609 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); 618 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
610 aec->sde[i][1] = 619 coherence_state->sde[i][1] =
611 ptrGCoh[0] * aec->sde[i][1] + 620 ptrGCoh[0] * coherence_state->sde[i][1] +
612 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); 621 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
613 622
614 aec->sxd[i][0] = 623 coherence_state->sxd[i][0] =
615 ptrGCoh[0] * aec->sxd[i][0] + 624 ptrGCoh[0] * coherence_state->sxd[i][0] +
616 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); 625 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
617 aec->sxd[i][1] = 626 coherence_state->sxd[i][1] =
618 ptrGCoh[0] * aec->sxd[i][1] + 627 ptrGCoh[0] * coherence_state->sxd[i][1] +
619 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); 628 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
620 629
621 sdSum += aec->sd[i]; 630 sdSum += coherence_state->sd[i];
622 seSum += aec->se[i]; 631 seSum += coherence_state->se[i];
623 } 632 }
624 633
625 // Divergent filter safeguard update. 634 // Divergent filter safeguard update.
626 aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; 635 *filter_divergence_state =
636 (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum;
627 637
628 // Signal extreme filter divergence if the error is significantly larger 638 // Signal extreme filter divergence if the error is significantly larger
629 // than the nearend (13 dB). 639 // than the nearend (13 dB).
630 *extreme_filter_divergence = (seSum > (19.95f * sdSum)); 640 *extreme_filter_divergence = (seSum > (19.95f * sdSum));
631 } 641 }
632 642
633 // Window time domain data to be used by the fft. 643 // Window time domain data to be used by the fft.
634 static void WindowDataNEON(float* x_windowed, const float* x) { 644 static void WindowDataNEON(float* x_windowed, const float* x) {
635 int i; 645 int i;
636 for (i = 0; i < PART_LEN; i += 4) { 646 for (i = 0; i < PART_LEN; i += 4) {
(...skipping 23 matching lines...) Expand all
660 vst1q_f32(&data_complex[0][i], vec_data.val[0]); 670 vst1q_f32(&data_complex[0][i], vec_data.val[0]);
661 vst1q_f32(&data_complex[1][i], vec_data.val[1]); 671 vst1q_f32(&data_complex[1][i], vec_data.val[1]);
662 } 672 }
663 // fix beginning/end values 673 // fix beginning/end values
664 data_complex[1][0] = 0; 674 data_complex[1][0] = 0;
665 data_complex[1][PART_LEN] = 0; 675 data_complex[1][PART_LEN] = 0;
666 data_complex[0][0] = data[0]; 676 data_complex[0][0] = data[0];
667 data_complex[0][PART_LEN] = data[1]; 677 data_complex[0][PART_LEN] = data[1];
668 } 678 }
669 679
670 static void SubbandCoherenceNEON(AecCore* aec, 680 static void SubbandCoherenceNEON(int mult,
681 bool extended_filter_enabled,
671 float efw[2][PART_LEN1], 682 float efw[2][PART_LEN1],
672 float dfw[2][PART_LEN1], 683 float dfw[2][PART_LEN1],
673 float xfw[2][PART_LEN1], 684 float xfw[2][PART_LEN1],
674 float* fft, 685 float* fft,
675 float* cohde, 686 float* cohde,
676 float* cohxd, 687 float* cohxd,
688 CoherenceState* coherence_state,
689 short* filter_divergence_state,
677 int* extreme_filter_divergence) { 690 int* extreme_filter_divergence) {
678 int i; 691 int i;
679 692
680 SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); 693 SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state,
694 filter_divergence_state, extreme_filter_divergence);
681 695
682 { 696 {
683 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); 697 const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);
684 698
685 // Subband coherence 699 // Subband coherence
686 for (i = 0; i + 3 < PART_LEN1; i += 4) { 700 for (i = 0; i + 3 < PART_LEN1; i += 4) {
687 const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); 701 const float32x4_t vec_sd = vld1q_f32(&coherence_state->sd[i]);
688 const float32x4_t vec_se = vld1q_f32(&aec->se[i]); 702 const float32x4_t vec_se = vld1q_f32(&coherence_state->se[i]);
689 const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); 703 const float32x4_t vec_sx = vld1q_f32(&coherence_state->sx[i]);
690 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); 704 const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
691 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); 705 const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
692 float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); 706 float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]);
693 float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); 707 float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]);
694 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); 708 float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
695 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); 709 float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
696 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); 710 vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
697 vec_cohde = vdivq_f32(vec_cohde, vec_sdse); 711 vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
698 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); 712 vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
699 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); 713 vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
700 714
701 vst1q_f32(&cohde[i], vec_cohde); 715 vst1q_f32(&cohde[i], vec_cohde);
702 vst1q_f32(&cohxd[i], vec_cohxd); 716 vst1q_f32(&cohxd[i], vec_cohxd);
703 } 717 }
704 } 718 }
705 // scalar code for the remaining items. 719 // scalar code for the remaining items.
706 for (; i < PART_LEN1; i++) { 720 for (; i < PART_LEN1; i++) {
707 cohde[i] = 721 cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] +
708 (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / 722 coherence_state->sde[i][1] * coherence_state->sde[i][1]) /
709 (aec->sd[i] * aec->se[i] + 1e-10f); 723 (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f);
710 cohxd[i] = 724 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +
711 (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / 725 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /
712 (aec->sx[i] * aec->sd[i] + 1e-10f); 726 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);
713 } 727 }
714 } 728 }
715 729
716 void WebRtcAec_InitAec_neon(void) { 730 void WebRtcAec_InitAec_neon(void) {
717 WebRtcAec_FilterFar = FilterFarNEON; 731 WebRtcAec_FilterFar = FilterFarNEON;
718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; 732 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 733 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; 734 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; 735 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; 736 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
723 WebRtcAec_PartitionDelay = PartitionDelayNEON; 737 WebRtcAec_PartitionDelay = PartitionDelayNEON;
724 WebRtcAec_WindowData = WindowDataNEON; 738 WebRtcAec_WindowData = WindowDataNEON;
725 } 739 }
726 } // namespace webrtc 740 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_internal.h ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698