Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.cc

Issue 1936203002: Made the method PartitionDelay independent of the AEC state. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec3_CL
Patch Set: Rebase Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 431 matching lines...) Expand 10 before | Expand all | Expand 10 after
442 } 442 }
443 443
444 __inline static void _mm_add_ps_4x1(__m128 sum, float* dst) { 444 __inline static void _mm_add_ps_4x1(__m128 sum, float* dst) {
445 // A+B C+D 445 // A+B C+D
446 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2))); 446 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
447 // A+B+C+D A+B+C+D 447 // A+B+C+D A+B+C+D
448 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); 448 sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
449 _mm_store_ss(dst, sum); 449 _mm_store_ss(dst, sum);
450 } 450 }
451 451
452 static int PartitionDelaySSE2(const AecCore* aec) { 452 static int PartitionDelaySSE2(
453 int num_partitions,
454 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
453 // Measures the energy in each filter partition and returns the partition with 455 // Measures the energy in each filter partition and returns the partition with
454 // highest energy. 456 // highest energy.
455 // TODO(bjornv): Spread computational cost by computing one partition per 457 // TODO(bjornv): Spread computational cost by computing one partition per
456 // block? 458 // block?
457 float wfEnMax = 0; 459 float wfEnMax = 0;
458 int i; 460 int i;
459 int delay = 0; 461 int delay = 0;
460 462
461 for (i = 0; i < aec->num_partitions; i++) { 463 for (i = 0; i < num_partitions; i++) {
462 int j; 464 int j;
463 int pos = i * PART_LEN1; 465 int pos = i * PART_LEN1;
464 float wfEn = 0; 466 float wfEn = 0;
465 __m128 vec_wfEn = _mm_set1_ps(0.0f); 467 __m128 vec_wfEn = _mm_set1_ps(0.0f);
466 // vectorized code (four at once) 468 // vectorized code (four at once)
467 for (j = 0; j + 3 < PART_LEN1; j += 4) { 469 for (j = 0; j + 3 < PART_LEN1; j += 4) {
468 const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); 470 const __m128 vec_wfBuf0 = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
469 const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); 471 const __m128 vec_wfBuf1 = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
470 vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0)); 472 vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
471 vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1)); 473 vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
472 } 474 }
473 _mm_add_ps_4x1(vec_wfEn, &wfEn); 475 _mm_add_ps_4x1(vec_wfEn, &wfEn);
474 476
475 // scalar code for the remaining items. 477 // scalar code for the remaining items.
476 for (; j < PART_LEN1; j++) { 478 for (; j < PART_LEN1; j++) {
477 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + 479 wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] +
478 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; 480 h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j];
479 } 481 }
480 482
481 if (wfEn > wfEnMax) { 483 if (wfEn > wfEnMax) {
482 wfEnMax = wfEn; 484 wfEnMax = wfEn;
483 delay = i; 485 delay = i;
484 } 486 }
485 } 487 }
486 return delay; 488 return delay;
487 } 489 }
488 490
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
744 WebRtcAec_FilterFar = FilterFarSSE2; 746 WebRtcAec_FilterFar = FilterFarSSE2;
745 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 747 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
746 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 748 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
747 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 749 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
748 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 750 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
749 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; 751 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;
750 WebRtcAec_PartitionDelay = PartitionDelaySSE2; 752 WebRtcAec_PartitionDelay = PartitionDelaySSE2;
751 WebRtcAec_WindowData = WindowDataSSE2; 753 WebRtcAec_WindowData = WindowDataSSE2;
752 } 754 }
753 } // namespace webrtc 755 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698