OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 430 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
441 // Suppress error signal | 441 // Suppress error signal |
442 efw[0][i] *= hNl[i]; | 442 efw[0][i] *= hNl[i]; |
443 efw[1][i] *= hNl[i]; | 443 efw[1][i] *= hNl[i]; |
444 | 444 |
445 // Ooura fft returns incorrect sign on imaginary component. It matters | 445 // Ooura fft returns incorrect sign on imaginary component. It matters |
446 // here because we are making an additive change with comfort noise. | 446 // here because we are making an additive change with comfort noise. |
447 efw[1][i] *= -1; | 447 efw[1][i] *= -1; |
448 } | 448 } |
449 } | 449 } |
450 | 450 |
451 static int PartitionDelayNEON(const AecCore* aec) { | 451 static int PartitionDelayNEON( |
| 452 int num_partitions, |
| 453 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { |
452 // Measures the energy in each filter partition and returns the partition with | 454 // Measures the energy in each filter partition and returns the partition with |
453 // highest energy. | 455 // highest energy. |
454 // TODO(bjornv): Spread computational cost by computing one partition per | 456 // TODO(bjornv): Spread computational cost by computing one partition per |
455 // block? | 457 // block? |
456 float wfEnMax = 0; | 458 float wfEnMax = 0; |
457 int i; | 459 int i; |
458 int delay = 0; | 460 int delay = 0; |
459 | 461 |
460 for (i = 0; i < aec->num_partitions; i++) { | 462 for (i = 0; i < num_partitions; i++) { |
461 int j; | 463 int j; |
462 int pos = i * PART_LEN1; | 464 int pos = i * PART_LEN1; |
463 float wfEn = 0; | 465 float wfEn = 0; |
464 float32x4_t vec_wfEn = vdupq_n_f32(0.0f); | 466 float32x4_t vec_wfEn = vdupq_n_f32(0.0f); |
465 // vectorized code (four at once) | 467 // vectorized code (four at once) |
466 for (j = 0; j + 3 < PART_LEN1; j += 4) { | 468 for (j = 0; j + 3 < PART_LEN1; j += 4) { |
467 const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]); | 469 const float32x4_t vec_wfBuf0 = vld1q_f32(&h_fft_buf[0][pos + j]); |
468 const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]); | 470 const float32x4_t vec_wfBuf1 = vld1q_f32(&h_fft_buf[1][pos + j]); |
469 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); | 471 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); |
470 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); | 472 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); |
471 } | 473 } |
472 { | 474 { |
473 float32x2_t vec_total; | 475 float32x2_t vec_total; |
474 // A B C D | 476 // A B C D |
475 vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); | 477 vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); |
476 // A+B C+D | 478 // A+B C+D |
477 vec_total = vpadd_f32(vec_total, vec_total); | 479 vec_total = vpadd_f32(vec_total, vec_total); |
478 // A+B+C+D A+B+C+D | 480 // A+B+C+D A+B+C+D |
479 wfEn = vget_lane_f32(vec_total, 0); | 481 wfEn = vget_lane_f32(vec_total, 0); |
480 } | 482 } |
481 | 483 |
482 // scalar code for the remaining items. | 484 // scalar code for the remaining items. |
483 for (; j < PART_LEN1; j++) { | 485 for (; j < PART_LEN1; j++) { |
484 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + | 486 wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + |
485 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; | 487 h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; |
486 } | 488 } |
487 | 489 |
488 if (wfEn > wfEnMax) { | 490 if (wfEn > wfEnMax) { |
489 wfEnMax = wfEn; | 491 wfEnMax = wfEn; |
490 delay = i; | 492 delay = i; |
491 } | 493 } |
492 } | 494 } |
493 return delay; | 495 return delay; |
494 } | 496 } |
495 | 497 |
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
731 WebRtcAec_FilterFar = FilterFarNEON; | 733 WebRtcAec_FilterFar = FilterFarNEON; |
732 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 734 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
733 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 735 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
734 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 736 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
735 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 737 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
736 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 738 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
737 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 739 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
738 WebRtcAec_WindowData = WindowDataNEON; | 740 WebRtcAec_WindowData = WindowDataNEON; |
739 } | 741 } |
740 } // namespace webrtc | 742 } // namespace webrtc |
OLD | NEW |