| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 430 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 441 // Suppress error signal | 441 // Suppress error signal |
| 442 efw[0][i] *= hNl[i]; | 442 efw[0][i] *= hNl[i]; |
| 443 efw[1][i] *= hNl[i]; | 443 efw[1][i] *= hNl[i]; |
| 444 | 444 |
| 445 // Ooura fft returns incorrect sign on imaginary component. It matters | 445 // Ooura fft returns incorrect sign on imaginary component. It matters |
| 446 // here because we are making an additive change with comfort noise. | 446 // here because we are making an additive change with comfort noise. |
| 447 efw[1][i] *= -1; | 447 efw[1][i] *= -1; |
| 448 } | 448 } |
| 449 } | 449 } |
| 450 | 450 |
| 451 static int PartitionDelayNEON(const AecCore* aec) { | 451 static int PartitionDelayNEON( |
| 452 int num_partitions, |
| 453 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { |
| 452 // Measures the energy in each filter partition and returns the partition with | 454 // Measures the energy in each filter partition and returns the partition with |
| 453 // highest energy. | 455 // highest energy. |
| 454 // TODO(bjornv): Spread computational cost by computing one partition per | 456 // TODO(bjornv): Spread computational cost by computing one partition per |
| 455 // block? | 457 // block? |
| 456 float wfEnMax = 0; | 458 float wfEnMax = 0; |
| 457 int i; | 459 int i; |
| 458 int delay = 0; | 460 int delay = 0; |
| 459 | 461 |
| 460 for (i = 0; i < aec->num_partitions; i++) { | 462 for (i = 0; i < num_partitions; i++) { |
| 461 int j; | 463 int j; |
| 462 int pos = i * PART_LEN1; | 464 int pos = i * PART_LEN1; |
| 463 float wfEn = 0; | 465 float wfEn = 0; |
| 464 float32x4_t vec_wfEn = vdupq_n_f32(0.0f); | 466 float32x4_t vec_wfEn = vdupq_n_f32(0.0f); |
| 465 // vectorized code (four at once) | 467 // vectorized code (four at once) |
| 466 for (j = 0; j + 3 < PART_LEN1; j += 4) { | 468 for (j = 0; j + 3 < PART_LEN1; j += 4) { |
| 467 const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]); | 469 const float32x4_t vec_wfBuf0 = vld1q_f32(&h_fft_buf[0][pos + j]); |
| 468 const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]); | 470 const float32x4_t vec_wfBuf1 = vld1q_f32(&h_fft_buf[1][pos + j]); |
| 469 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); | 471 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); |
| 470 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); | 472 vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); |
| 471 } | 473 } |
| 472 { | 474 { |
| 473 float32x2_t vec_total; | 475 float32x2_t vec_total; |
| 474 // A B C D | 476 // A B C D |
| 475 vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); | 477 vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); |
| 476 // A+B C+D | 478 // A+B C+D |
| 477 vec_total = vpadd_f32(vec_total, vec_total); | 479 vec_total = vpadd_f32(vec_total, vec_total); |
| 478 // A+B+C+D A+B+C+D | 480 // A+B+C+D A+B+C+D |
| 479 wfEn = vget_lane_f32(vec_total, 0); | 481 wfEn = vget_lane_f32(vec_total, 0); |
| 480 } | 482 } |
| 481 | 483 |
| 482 // scalar code for the remaining items. | 484 // scalar code for the remaining items. |
| 483 for (; j < PART_LEN1; j++) { | 485 for (; j < PART_LEN1; j++) { |
| 484 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + | 486 wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + |
| 485 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; | 487 h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; |
| 486 } | 488 } |
| 487 | 489 |
| 488 if (wfEn > wfEnMax) { | 490 if (wfEn > wfEnMax) { |
| 489 wfEnMax = wfEn; | 491 wfEnMax = wfEn; |
| 490 delay = i; | 492 delay = i; |
| 491 } | 493 } |
| 492 } | 494 } |
| 493 return delay; | 495 return delay; |
| 494 } | 496 } |
| 495 | 497 |
| (...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 731 WebRtcAec_FilterFar = FilterFarNEON; | 733 WebRtcAec_FilterFar = FilterFarNEON; |
| 732 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 734 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
| 733 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 735 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
| 734 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 736 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
| 735 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 737 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
| 736 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; | 738 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; |
| 737 WebRtcAec_PartitionDelay = PartitionDelayNEON; | 739 WebRtcAec_PartitionDelay = PartitionDelayNEON; |
| 738 WebRtcAec_WindowData = WindowDataNEON; | 740 WebRtcAec_WindowData = WindowDataNEON; |
| 739 } | 741 } |
| 740 } // namespace webrtc | 742 } // namespace webrtc |
| OLD | NEW |