Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(77)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.cc

Issue 1939723002: Removed the state as an input to OverdriveAndSuppress in the AEC. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Rebase with latest master Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after
368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); 368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
369 const __m128 exp2_y = 369 const __m128 exp2_y =
370 _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0))); 370 _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0)));
371 371
372 // Combine parts. 372 // Combine parts.
373 a_exp_b = _mm_mul_ps(exp2_y, two_n); 373 a_exp_b = _mm_mul_ps(exp2_y, two_n);
374 } 374 }
375 return a_exp_b; 375 return a_exp_b;
376 } 376 }
377 377
378 static void OverdriveAndSuppressSSE2(AecCore* aec, 378 static void OverdriveAndSuppressSSE2(float overdrive_scaling,
379 float hNl[PART_LEN1], 379 float hNl[PART_LEN1],
380 const float hNlFb, 380 const float hNlFb,
381 float efw[2][PART_LEN1]) { 381 float efw[2][PART_LEN1]) {
382 int i; 382 int i;
383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); 383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
384 const __m128 vec_one = _mm_set1_ps(1.0f); 384 const __m128 vec_one = _mm_set1_ps(1.0f);
385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f); 385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
386 const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); 386 const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling);
387 // vectorized code (four at once) 387 // vectorized code (four at once)
388 for (i = 0; i + 3 < PART_LEN1; i += 4) { 388 for (i = 0; i + 3 < PART_LEN1; i += 4) {
389 // Weight subbands 389 // Weight subbands
390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); 390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); 391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); 392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); 393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); 394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
395 const __m128 vec_one_weightCurve_hNl = 395 const __m128 vec_one_weightCurve_hNl =
396 _mm_mul_ps(vec_one_weightCurve, vec_hNl); 396 _mm_mul_ps(vec_one_weightCurve, vec_hNl);
397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); 397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
398 const __m128 vec_if1 = _mm_and_ps( 398 const __m128 vec_if1 = _mm_and_ps(
399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); 399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
400 vec_hNl = _mm_or_ps(vec_if0, vec_if1); 400 vec_hNl = _mm_or_ps(vec_if0, vec_if1);
401 401
402 { 402 {
403 const __m128 vec_overDriveCurve = 403 const __m128 vec_overDriveCurve =
404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); 404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
405 const __m128 vec_overDriveSm_overDriveCurve = 405 const __m128 vec_overDriveSm_overDriveCurve =
406 _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve); 406 _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve);
407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); 407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
408 _mm_storeu_ps(&hNl[i], vec_hNl); 408 _mm_storeu_ps(&hNl[i], vec_hNl);
409 } 409 }
410 410
411 // Suppress error signal 411 // Suppress error signal
412 { 412 {
413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); 413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); 414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); 415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); 416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
417 417
418 // Ooura fft returns incorrect sign on imaginary component. It matters 418 // Ooura fft returns incorrect sign on imaginary component. It matters
419 // here because we are making an additive change with comfort noise. 419 // here because we are making an additive change with comfort noise.
420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); 420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
421 _mm_storeu_ps(&efw[0][i], vec_efw_re); 421 _mm_storeu_ps(&efw[0][i], vec_efw_re);
422 _mm_storeu_ps(&efw[1][i], vec_efw_im); 422 _mm_storeu_ps(&efw[1][i], vec_efw_im);
423 } 423 }
424 } 424 }
425 // scalar code for the remaining items. 425 // scalar code for the remaining items.
426 for (; i < PART_LEN1; i++) { 426 for (; i < PART_LEN1; i++) {
427 // Weight subbands 427 // Weight subbands
428 if (hNl[i] > hNlFb) { 428 if (hNl[i] > hNlFb) {
429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
430 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 430 (1 - WebRtcAec_weightCurve[i]) * hNl[i];
431 } 431 }
432 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 432 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
433 433
434 // Suppress error signal 434 // Suppress error signal
435 efw[0][i] *= hNl[i]; 435 efw[0][i] *= hNl[i];
436 efw[1][i] *= hNl[i]; 436 efw[1][i] *= hNl[i];
437 437
438 // Ooura fft returns incorrect sign on imaginary component. It matters 438 // Ooura fft returns incorrect sign on imaginary component. It matters
439 // here because we are making an additive change with comfort noise. 439 // here because we are making an additive change with comfort noise.
440 efw[1][i] *= -1; 440 efw[1][i] *= -1;
441 } 441 }
442 } 442 }
(...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after
728 WebRtcAec_FilterFar = FilterFarSSE2; 728 WebRtcAec_FilterFar = FilterFarSSE2;
729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; 729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; 730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; 731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; 732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; 733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;
734 WebRtcAec_PartitionDelay = PartitionDelaySSE2; 734 WebRtcAec_PartitionDelay = PartitionDelaySSE2;
735 WebRtcAec_WindowData = WindowDataSSE2; 735 WebRtcAec_WindowData = WindowDataSSE2;
736 } 736 }
737 } // namespace webrtc 737 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698