OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); | 368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); |
369 const __m128 exp2_y = | 369 const __m128 exp2_y = |
370 _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0))); | 370 _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0))); |
371 | 371 |
372 // Combine parts. | 372 // Combine parts. |
373 a_exp_b = _mm_mul_ps(exp2_y, two_n); | 373 a_exp_b = _mm_mul_ps(exp2_y, two_n); |
374 } | 374 } |
375 return a_exp_b; | 375 return a_exp_b; |
376 } | 376 } |
377 | 377 |
378 static void OverdriveAndSuppressSSE2(AecCore* aec, | 378 static void OverdriveAndSuppressSSE2(float overdrive_scaling, |
379 float hNl[PART_LEN1], | 379 float hNl[PART_LEN1], |
380 const float hNlFb, | 380 const float hNlFb, |
381 float efw[2][PART_LEN1]) { | 381 float efw[2][PART_LEN1]) { |
382 int i; | 382 int i; |
383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); | 383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); |
384 const __m128 vec_one = _mm_set1_ps(1.0f); | 384 const __m128 vec_one = _mm_set1_ps(1.0f); |
385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f); | 385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f); |
386 const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); | 386 const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling); |
387 // vectorized code (four at once) | 387 // vectorized code (four at once) |
388 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 388 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
389 // Weight subbands | 389 // Weight subbands |
390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); | 390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); |
391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); | 391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); |
392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); | 392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); |
393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); | 393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); |
394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); | 394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); |
395 const __m128 vec_one_weightCurve_hNl = | 395 const __m128 vec_one_weightCurve_hNl = |
396 _mm_mul_ps(vec_one_weightCurve, vec_hNl); | 396 _mm_mul_ps(vec_one_weightCurve, vec_hNl); |
397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); | 397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); |
398 const __m128 vec_if1 = _mm_and_ps( | 398 const __m128 vec_if1 = _mm_and_ps( |
399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); | 399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); |
400 vec_hNl = _mm_or_ps(vec_if0, vec_if1); | 400 vec_hNl = _mm_or_ps(vec_if0, vec_if1); |
401 | 401 |
402 { | 402 { |
403 const __m128 vec_overDriveCurve = | 403 const __m128 vec_overDriveCurve = |
404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); | 404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); |
405 const __m128 vec_overDriveSm_overDriveCurve = | 405 const __m128 vec_overDriveSm_overDriveCurve = |
406 _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve); | 406 _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); |
407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); | 407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); |
408 _mm_storeu_ps(&hNl[i], vec_hNl); | 408 _mm_storeu_ps(&hNl[i], vec_hNl); |
409 } | 409 } |
410 | 410 |
411 // Suppress error signal | 411 // Suppress error signal |
412 { | 412 { |
413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); | 413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); |
414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); | 414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); |
415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); | 415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); |
416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); | 416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); |
417 | 417 |
418 // Ooura fft returns incorrect sign on imaginary component. It matters | 418 // Ooura fft returns incorrect sign on imaginary component. It matters |
419 // here because we are making an additive change with comfort noise. | 419 // here because we are making an additive change with comfort noise. |
420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); | 420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); |
421 _mm_storeu_ps(&efw[0][i], vec_efw_re); | 421 _mm_storeu_ps(&efw[0][i], vec_efw_re); |
422 _mm_storeu_ps(&efw[1][i], vec_efw_im); | 422 _mm_storeu_ps(&efw[1][i], vec_efw_im); |
423 } | 423 } |
424 } | 424 } |
425 // scalar code for the remaining items. | 425 // scalar code for the remaining items. |
426 for (; i < PART_LEN1; i++) { | 426 for (; i < PART_LEN1; i++) { |
427 // Weight subbands | 427 // Weight subbands |
428 if (hNl[i] > hNlFb) { | 428 if (hNl[i] > hNlFb) { |
429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + | 429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + |
430 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; | 430 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; |
431 } | 431 } |
432 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); | 432 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); |
433 | 433 |
434 // Suppress error signal | 434 // Suppress error signal |
435 efw[0][i] *= hNl[i]; | 435 efw[0][i] *= hNl[i]; |
436 efw[1][i] *= hNl[i]; | 436 efw[1][i] *= hNl[i]; |
437 | 437 |
438 // Ooura fft returns incorrect sign on imaginary component. It matters | 438 // Ooura fft returns incorrect sign on imaginary component. It matters |
439 // here because we are making an additive change with comfort noise. | 439 // here because we are making an additive change with comfort noise. |
440 efw[1][i] *= -1; | 440 efw[1][i] *= -1; |
441 } | 441 } |
442 } | 442 } |
(...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
728 WebRtcAec_FilterFar = FilterFarSSE2; | 728 WebRtcAec_FilterFar = FilterFarSSE2; |
729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; |
733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; |
734 WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 734 WebRtcAec_PartitionDelay = PartitionDelaySSE2; |
735 WebRtcAec_WindowData = WindowDataSSE2; | 735 WebRtcAec_WindowData = WindowDataSSE2; |
736 } | 736 } |
737 } // namespace webrtc | 737 } // namespace webrtc |
OLD | NEW |