webrtc/modules/audio_processing/aec/aec_core_neon.cc - Issue 1943753002: Separated the functionalities in the OverdriveAndSuppress method in the AEC into two methods

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 1943753002: Separated the functionalities in the OverdriveAndSuppress method in the AEC into two methods (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec5_CL

Patch Set: Added const specifier to local pointer Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 356 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
367 exp2_y = vmlaq_f32(C1, y, exp2_y);	367 exp2_y = vmlaq_f32(C1, y, exp2_y);

368 exp2_y = vmlaq_f32(C0, y, exp2_y);	368 exp2_y = vmlaq_f32(C0, y, exp2_y);

369	369

370 // Combine parts.	370 // Combine parts.

371 a_exp_b = vmulq_f32(exp2_y, two_n);	371 a_exp_b = vmulq_f32(exp2_y, two_n);

372 }	372 }

373	373

374 return a_exp_b;	374 return a_exp_b;

375 }	375 }

376	376

377 static void OverdriveAndSuppressNEON(float overdrive_scaling,	377 static void OverdriveNEON(float overdrive_scaling,

378 float hNl[PART_LEN1],	378 float hNlFb,

379 const float hNlFb,	379 float hNl[PART_LEN1]) {

380 float efw[2][PART_LEN1]) {

381 int i;	380 int i;

382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);	381 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);

383 const float32x4_t vec_one = vdupq_n_f32(1.0f);	382 const float32x4_t vec_one = vdupq_n_f32(1.0f);

384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);

385 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);	383 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);

386	384

387 // vectorized code (four at once)	385 // vectorized code (four at once)

388 for (i = 0; i + 3 < PART_LEN1; i += 4) {	386 for (i = 0; i + 3 < PART_LEN1; i += 4) {

389 // Weight subbands	387 // Weight subbands

390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);	388 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);

391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);	389 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);

392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);	390 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);

393 const float32x4_t vec_weightCurve_hNlFb =	391 const float32x4_t vec_weightCurve_hNlFb =

394 vmulq_f32(vec_weightCurve, vec_hNlFb);	392 vmulq_f32(vec_weightCurve, vec_hNlFb);

395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);	393 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);

396 const float32x4_t vec_one_weightCurve_hNl =	394 const float32x4_t vec_one_weightCurve_hNl =

397 vmulq_f32(vec_one_weightCurve, vec_hNl);	395 vmulq_f32(vec_one_weightCurve, vec_hNl);

398 const uint32x4_t vec_if0 =	396 const uint32x4_t vec_if0 =

399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl));	397 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl));

400 const float32x4_t vec_one_weightCurve_add =	398 const float32x4_t vec_one_weightCurve_add =

401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);	399 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);

402 const uint32x4_t vec_if1 =	400 const uint32x4_t vec_if1 =

403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));	401 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));

404	402

405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));	403 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));

406	404

407 {	405 const float32x4_t vec_overDriveCurve =

408 const float32x4_t vec_overDriveCurve =	406 vld1q_f32(&WebRtcAec_overDriveCurve[i]);

409 vld1q_f32(&WebRtcAec_overDriveCurve[i]);	407 const float32x4_t vec_overDriveSm_overDriveCurve =

410 const float32x4_t vec_overDriveSm_overDriveCurve =	408 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);

411 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);	409 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);

412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);	410 vst1q_f32(&hNl[i], vec_hNl);

413 vst1q_f32(&hNl[i], vec_hNl);

414 }

415

416 // Suppress error signal

417 {

418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);

419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);

420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);

421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);

422

423 // Ooura fft returns incorrect sign on imaginary component. It matters

424 // here because we are making an additive change with comfort noise.

425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);

426 vst1q_f32(&efw[0][i], vec_efw_re);

427 vst1q_f32(&efw[1][i], vec_efw_im);

428 }

429 }	411 }

430	412

431 // scalar code for the remaining items.	413 // scalar code for the remaining items.

432 for (; i < PART_LEN1; i++) {	414 for (; i < PART_LEN1; i++) {

433 // Weight subbands	415 // Weight subbands

434 if (hNl[i] > hNlFb) {	416 if (hNl[i] > hNlFb) {

435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +	417 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +

436 (1 - WebRtcAec_weightCurve[i]) * hNl[i];	418 (1 - WebRtcAec_weightCurve[i]) * hNl[i];

437 }	419 }

438	420

439 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);	421 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);

	422 }

	423 }

440	424

441 // Suppress error signal	425 static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {

	426 int i;

	427 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);

	428 // vectorized code (four at once)

	429 for (i = 0; i + 3 < PART_LEN1; i += 4) {

	430 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);

	431 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);

	432 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);

	433 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);

	434 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);

	435

	436 // Ooura fft returns incorrect sign on imaginary component. It matters

	437 // here because we are making an additive change with comfort noise.

	438 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);

	439 vst1q_f32(&efw[0][i], vec_efw_re);

	440 vst1q_f32(&efw[1][i], vec_efw_im);

	441 }

	442

	443 // scalar code for the remaining items.

	444 for (; i < PART_LEN1; i++) {

442 efw[0][i] *= hNl[i];	445 efw[0][i] *= hNl[i];

443 efw[1][i] *= hNl[i];	446 efw[1][i] *= hNl[i];

444	447

445 // Ooura fft returns incorrect sign on imaginary component. It matters	448 // Ooura fft returns incorrect sign on imaginary component. It matters

446 // here because we are making an additive change with comfort noise.	449 // here because we are making an additive change with comfort noise.

447 efw[1][i] *= -1;	450 efw[1][i] *= -1;

448 }	451 }

449 }	452 }

450	453

451 static int PartitionDelayNEON(	454 static int PartitionDelayNEON(

(...skipping 263 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
715 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +	718 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +

716 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /	719 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /

717 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);	720 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);

718 }	721 }

719 }	722 }

720	723

721 void WebRtcAec_InitAec_neon(void) {	724 void WebRtcAec_InitAec_neon(void) {

722 WebRtcAec_FilterFar = FilterFarNEON;	725 WebRtcAec_FilterFar = FilterFarNEON;

723 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;	726 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;

724 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;	727 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;

725 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;	728 WebRtcAec_Overdrive = OverdriveNEON;

	729 WebRtcAec_Suppress = SuppressNEON;

726 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;	730 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;

727 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;	731 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;

728 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;	732 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;

729 WebRtcAec_PartitionDelay = PartitionDelayNEON;	733 WebRtcAec_PartitionDelay = PartitionDelayNEON;

730 WebRtcAec_WindowData = WindowDataNEON;	734 WebRtcAec_WindowData = WindowDataNEON;

731 }	735 }

732 } // namespace webrtc	736 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.cc ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.cc » ('j') | no next file with comments »