webrtc/modules/audio_processing/aec/aec_core_sse2.cc - Issue 1939723002: Removed the state as an input to OverdriveAndSuppress in the AEC.

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_sse2.cc

Issue 1939723002: Removed the state as an input to OverdriveAndSuppress in the AEC. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Rebase with latest master Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_neon.cc ('k') | no next file » | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 357 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);	368 const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);

369 const __m128 exp2_y =	369 const __m128 exp2_y =

370 _mm_add_ps(exp2_y_2, (reinterpret_cast<const __m128>(C0)));	370 _mm_add_ps(exp2_y_2, (reinterpret_cast<const __m128>(C0)));

371	371

372 // Combine parts.	372 // Combine parts.

373 a_exp_b = _mm_mul_ps(exp2_y, two_n);	373 a_exp_b = _mm_mul_ps(exp2_y, two_n);

374 }	374 }

375 return a_exp_b;	375 return a_exp_b;

376 }	376 }

377	377

378 static void OverdriveAndSuppressSSE2(AecCore* aec,	378 static void OverdriveAndSuppressSSE2(float overdrive_scaling,

379 float hNl[PART_LEN1],	379 float hNl[PART_LEN1],

380 const float hNlFb,	380 const float hNlFb,

381 float efw[2][PART_LEN1]) {	381 float efw[2][PART_LEN1]) {

382 int i;	382 int i;

383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);	383 const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);

384 const __m128 vec_one = _mm_set1_ps(1.0f);	384 const __m128 vec_one = _mm_set1_ps(1.0f);

385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f);	385 const __m128 vec_minus_one = _mm_set1_ps(-1.0f);

386 const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);	386 const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling);

387 // vectorized code (four at once)	387 // vectorized code (four at once)

388 for (i = 0; i + 3 < PART_LEN1; i += 4) {	388 for (i = 0; i + 3 < PART_LEN1; i += 4) {

389 // Weight subbands	389 // Weight subbands

390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);	390 __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);

391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);	391 const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);

392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);	392 const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);

393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);	393 const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);

394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);	394 const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);

395 const __m128 vec_one_weightCurve_hNl =	395 const __m128 vec_one_weightCurve_hNl =

396 _mm_mul_ps(vec_one_weightCurve, vec_hNl);	396 _mm_mul_ps(vec_one_weightCurve, vec_hNl);

397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);	397 const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);

398 const __m128 vec_if1 = _mm_and_ps(	398 const __m128 vec_if1 = _mm_and_ps(

399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));	399 bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));

400 vec_hNl = _mm_or_ps(vec_if0, vec_if1);	400 vec_hNl = _mm_or_ps(vec_if0, vec_if1);

401	401

402 {	402 {

403 const __m128 vec_overDriveCurve =	403 const __m128 vec_overDriveCurve =

404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);	404 _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);

405 const __m128 vec_overDriveSm_overDriveCurve =	405 const __m128 vec_overDriveSm_overDriveCurve =

406 _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);	406 _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve);

407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);	407 vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);

408 _mm_storeu_ps(&hNl[i], vec_hNl);	408 _mm_storeu_ps(&hNl[i], vec_hNl);

409 }	409 }

410	410

411 // Suppress error signal	411 // Suppress error signal

412 {	412 {

413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);	413 __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);

414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);	414 __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);

415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);	415 vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);

416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);	416 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);

417	417

418 // Ooura fft returns incorrect sign on imaginary component. It matters	418 // Ooura fft returns incorrect sign on imaginary component. It matters

419 // here because we are making an additive change with comfort noise.	419 // here because we are making an additive change with comfort noise.

420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);	420 vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);

421 _mm_storeu_ps(&efw[0][i], vec_efw_re);	421 _mm_storeu_ps(&efw[0][i], vec_efw_re);

422 _mm_storeu_ps(&efw[1][i], vec_efw_im);	422 _mm_storeu_ps(&efw[1][i], vec_efw_im);

423 }	423 }

424 }	424 }

425 // scalar code for the remaining items.	425 // scalar code for the remaining items.

426 for (; i < PART_LEN1; i++) {	426 for (; i < PART_LEN1; i++) {

427 // Weight subbands	427 // Weight subbands

428 if (hNl[i] > hNlFb) {	428 if (hNl[i] > hNlFb) {

429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +	429 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +

430 (1 - WebRtcAec_weightCurve[i]) * hNl[i];	430 (1 - WebRtcAec_weightCurve[i]) * hNl[i];

431 }	431 }

432 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);	432 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);

433	433

434 // Suppress error signal	434 // Suppress error signal

435 efw[0][i] *= hNl[i];	435 efw[0][i] *= hNl[i];

436 efw[1][i] *= hNl[i];	436 efw[1][i] *= hNl[i];

437	437

438 // Ooura fft returns incorrect sign on imaginary component. It matters	438 // Ooura fft returns incorrect sign on imaginary component. It matters

439 // here because we are making an additive change with comfort noise.	439 // here because we are making an additive change with comfort noise.

440 efw[1][i] *= -1;	440 efw[1][i] *= -1;

441 }	441 }

442 }	442 }

(...skipping 285 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
728 WebRtcAec_FilterFar = FilterFarSSE2;	728 WebRtcAec_FilterFar = FilterFarSSE2;

729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;	729 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;

730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;	730 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;

731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;	731 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;

732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;	732 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;

733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;	733 WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;

734 WebRtcAec_PartitionDelay = PartitionDelaySSE2;	734 WebRtcAec_PartitionDelay = PartitionDelaySSE2;

735 WebRtcAec_WindowData = WindowDataSSE2;	735 WebRtcAec_WindowData = WindowDataSSE2;

736 }	736 }

737 } // namespace webrtc	737 } // namespace webrtc

OLD	NEW