webrtc/modules/audio_processing/aec/aec_core_neon.cc - Issue 1939723002: Removed the state as an input to OverdriveAndSuppress in the AEC.

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 1939723002: Removed the state as an input to OverdriveAndSuppress in the AEC. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Rebase with latest master Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.cc ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.cc » ('j') | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 356 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
367 exp2_y = vmlaq_f32(C1, y, exp2_y);	367 exp2_y = vmlaq_f32(C1, y, exp2_y);

368 exp2_y = vmlaq_f32(C0, y, exp2_y);	368 exp2_y = vmlaq_f32(C0, y, exp2_y);

369	369

370 // Combine parts.	370 // Combine parts.

371 a_exp_b = vmulq_f32(exp2_y, two_n);	371 a_exp_b = vmulq_f32(exp2_y, two_n);

372 }	372 }

373	373

374 return a_exp_b;	374 return a_exp_b;

375 }	375 }

376	376

377 static void OverdriveAndSuppressNEON(AecCore* aec,	377 static void OverdriveAndSuppressNEON(float overdrive_scaling,

378 float hNl[PART_LEN1],	378 float hNl[PART_LEN1],

379 const float hNlFb,	379 const float hNlFb,

380 float efw[2][PART_LEN1]) {	380 float efw[2][PART_LEN1]) {

381 int i;	381 int i;

382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);	382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);

383 const float32x4_t vec_one = vdupq_n_f32(1.0f);	383 const float32x4_t vec_one = vdupq_n_f32(1.0f);

384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);	384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);

385 const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);	385 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);

386	386

387 // vectorized code (four at once)	387 // vectorized code (four at once)

388 for (i = 0; i + 3 < PART_LEN1; i += 4) {	388 for (i = 0; i + 3 < PART_LEN1; i += 4) {

389 // Weight subbands	389 // Weight subbands

390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);	390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);

391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);	391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);

392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);	392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);

393 const float32x4_t vec_weightCurve_hNlFb =	393 const float32x4_t vec_weightCurve_hNlFb =

394 vmulq_f32(vec_weightCurve, vec_hNlFb);	394 vmulq_f32(vec_weightCurve, vec_hNlFb);

395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);	395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);

396 const float32x4_t vec_one_weightCurve_hNl =	396 const float32x4_t vec_one_weightCurve_hNl =

397 vmulq_f32(vec_one_weightCurve, vec_hNl);	397 vmulq_f32(vec_one_weightCurve, vec_hNl);

398 const uint32x4_t vec_if0 =	398 const uint32x4_t vec_if0 =

399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl));	399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl));

400 const float32x4_t vec_one_weightCurve_add =	400 const float32x4_t vec_one_weightCurve_add =

401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);	401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);

402 const uint32x4_t vec_if1 =	402 const uint32x4_t vec_if1 =

403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));	403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));

404	404

405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));	405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));

406	406

407 {	407 {

408 const float32x4_t vec_overDriveCurve =	408 const float32x4_t vec_overDriveCurve =

409 vld1q_f32(&WebRtcAec_overDriveCurve[i]);	409 vld1q_f32(&WebRtcAec_overDriveCurve[i]);

410 const float32x4_t vec_overDriveSm_overDriveCurve =	410 const float32x4_t vec_overDriveSm_overDriveCurve =

411 vmulq_f32(vec_overDriveSm, vec_overDriveCurve);	411 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);

412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);	412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);

413 vst1q_f32(&hNl[i], vec_hNl);	413 vst1q_f32(&hNl[i], vec_hNl);

414 }	414 }

415	415

416 // Suppress error signal	416 // Suppress error signal

417 {	417 {

418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);	418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);

419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);	419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);

420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);	420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);

421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);	421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);

422	422

423 // Ooura fft returns incorrect sign on imaginary component. It matters	423 // Ooura fft returns incorrect sign on imaginary component. It matters

424 // here because we are making an additive change with comfort noise.	424 // here because we are making an additive change with comfort noise.

425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);	425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);

426 vst1q_f32(&efw[0][i], vec_efw_re);	426 vst1q_f32(&efw[0][i], vec_efw_re);

427 vst1q_f32(&efw[1][i], vec_efw_im);	427 vst1q_f32(&efw[1][i], vec_efw_im);

428 }	428 }

429 }	429 }

430	430

431 // scalar code for the remaining items.	431 // scalar code for the remaining items.

432 for (; i < PART_LEN1; i++) {	432 for (; i < PART_LEN1; i++) {

433 // Weight subbands	433 // Weight subbands

434 if (hNl[i] > hNlFb) {	434 if (hNl[i] > hNlFb) {

435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +	435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +

436 (1 - WebRtcAec_weightCurve[i]) * hNl[i];	436 (1 - WebRtcAec_weightCurve[i]) * hNl[i];

437 }	437 }

438	438

439 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);	439 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);

440	440

441 // Suppress error signal	441 // Suppress error signal

442 efw[0][i] *= hNl[i];	442 efw[0][i] *= hNl[i];

443 efw[1][i] *= hNl[i];	443 efw[1][i] *= hNl[i];

444	444

445 // Ooura fft returns incorrect sign on imaginary component. It matters	445 // Ooura fft returns incorrect sign on imaginary component. It matters

446 // here because we are making an additive change with comfort noise.	446 // here because we are making an additive change with comfort noise.

447 efw[1][i] *= -1;	447 efw[1][i] *= -1;

448 }	448 }

449 }	449 }

(...skipping 267 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
717 WebRtcAec_FilterFar = FilterFarNEON;	717 WebRtcAec_FilterFar = FilterFarNEON;

718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;	718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;

719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;	719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;

720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;	720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;

721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;	721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;

722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;	722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;

723 WebRtcAec_PartitionDelay = PartitionDelayNEON;	723 WebRtcAec_PartitionDelay = PartitionDelayNEON;

724 WebRtcAec_WindowData = WindowDataNEON;	724 WebRtcAec_WindowData = WindowDataNEON;

725 }	725 }

726 } // namespace webrtc	726 } // namespace webrtc

OLD	NEW