| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); | 368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); |
| 369 const __m128 exp2_y = | 369 const __m128 exp2_y = |
| 370 _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0))); | 370 _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0))); |
| 371 | 371 |
| 372 // Combine parts. | 372 // Combine parts. |
| 373 a_exp_b = _mm_mul_ps(exp2_y, two_n); | 373 a_exp_b = _mm_mul_ps(exp2_y, two_n); |
| 374 } | 374 } |
| 375 return a_exp_b; | 375 return a_exp_b; |
| 376 } | 376 } |
| 377 | 377 |
| 378 static void OverdriveAndSuppressSSE2(AecCore* aec, | 378 static void OverdriveAndSuppressSSE2(float overdrive_scaling, |
| 379 float hNl[PART_LEN1], | 379 float hNl[PART_LEN1], |
| 380 const float hNlFb, | 380 const float hNlFb, |
| 381 float efw[2][PART_LEN1]) { | 381 float efw[2][PART_LEN1]) { |
| 382 int i; | 382 int i; |
| 383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); | 383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); |
| 384 const __m128 vec_one = _mm_set1_ps(1.0f); | 384 const __m128 vec_one = _mm_set1_ps(1.0f); |
| 385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f); | 385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f); |
| 386 const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); | 386 const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling); |
| 387 // vectorized code (four at once) | 387 // vectorized code (four at once) |
| 388 for (i = 0; i + 3 < PART_LEN1; i += 4) { | 388 for (i = 0; i + 3 < PART_LEN1; i += 4) { |
| 389 // Weight subbands | 389 // Weight subbands |
| 390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); | 390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); |
| 391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); | 391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); |
| 392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); | 392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); |
| 393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); | 393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); |
| 394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); | 394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); |
| 395 const __m128 vec_one_weightCurve_hNl = | 395 const __m128 vec_one_weightCurve_hNl = |
| 396 _mm_mul_ps(vec_one_weightCurve, vec_hNl); | 396 _mm_mul_ps(vec_one_weightCurve, vec_hNl); |
| 397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); | 397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); |
| 398 const __m128 vec_if1 = _mm_and_ps( | 398 const __m128 vec_if1 = _mm_and_ps( |
| 399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); | 399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); |
| 400 vec_hNl = _mm_or_ps(vec_if0, vec_if1); | 400 vec_hNl = _mm_or_ps(vec_if0, vec_if1); |
| 401 | 401 |
| 402 { | 402 { |
| 403 const __m128 vec_overDriveCurve = | 403 const __m128 vec_overDriveCurve = |
| 404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); | 404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); |
| 405 const __m128 vec_overDriveSm_overDriveCurve = | 405 const __m128 vec_overDriveSm_overDriveCurve = |
| 406 _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve); | 406 _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); |
| 407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); | 407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); |
| 408 _mm_storeu_ps(&hNl[i], vec_hNl); | 408 _mm_storeu_ps(&hNl[i], vec_hNl); |
| 409 } | 409 } |
| 410 | 410 |
| 411 // Suppress error signal | 411 // Suppress error signal |
| 412 { | 412 { |
| 413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); | 413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); |
| 414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); | 414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); |
| 415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); | 415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); |
| 416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); | 416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); |
| 417 | 417 |
| 418 // Ooura fft returns incorrect sign on imaginary component. It matters | 418 // Ooura fft returns incorrect sign on imaginary component. It matters |
| 419 // here because we are making an additive change with comfort noise. | 419 // here because we are making an additive change with comfort noise. |
| 420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); | 420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); |
| 421 _mm_storeu_ps(&efw[0][i], vec_efw_re); | 421 _mm_storeu_ps(&efw[0][i], vec_efw_re); |
| 422 _mm_storeu_ps(&efw[1][i], vec_efw_im); | 422 _mm_storeu_ps(&efw[1][i], vec_efw_im); |
| 423 } | 423 } |
| 424 } | 424 } |
| 425 // scalar code for the remaining items. | 425 // scalar code for the remaining items. |
| 426 for (; i < PART_LEN1; i++) { | 426 for (; i < PART_LEN1; i++) { |
| 427 // Weight subbands | 427 // Weight subbands |
| 428 if (hNl[i] > hNlFb) { | 428 if (hNl[i] > hNlFb) { |
| 429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + | 429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + |
| 430 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; | 430 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; |
| 431 } | 431 } |
| 432 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); | 432 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); |
| 433 | 433 |
| 434 // Suppress error signal | 434 // Suppress error signal |
| 435 efw[0][i] *= hNl[i]; | 435 efw[0][i] *= hNl[i]; |
| 436 efw[1][i] *= hNl[i]; | 436 efw[1][i] *= hNl[i]; |
| 437 | 437 |
| 438 // Ooura fft returns incorrect sign on imaginary component. It matters | 438 // Ooura fft returns incorrect sign on imaginary component. It matters |
| 439 // here because we are making an additive change with comfort noise. | 439 // here because we are making an additive change with comfort noise. |
| 440 efw[1][i] *= -1; | 440 efw[1][i] *= -1; |
| 441 } | 441 } |
| 442 } | 442 } |
| (...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 728 WebRtcAec_FilterFar = FilterFarSSE2; | 728 WebRtcAec_FilterFar = FilterFarSSE2; |
| 729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
| 730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
| 731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
| 732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; |
| 733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; | 733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; |
| 734 WebRtcAec_PartitionDelay = PartitionDelaySSE2; | 734 WebRtcAec_PartitionDelay = PartitionDelaySSE2; |
| 735 WebRtcAec_WindowData = WindowDataSSE2; | 735 WebRtcAec_WindowData = WindowDataSSE2; |
| 736 } | 736 } |
| 737 } // namespace webrtc | 737 } // namespace webrtc |
| OLD | NEW |