Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 1943753002: Separated the functionalities in the OverdriveAndSuppress method in the AEC into two methods (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@RefactorAec5_CL
Patch Set: Added const specifier to local pointer Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 exp2_y = vmlaq_f32(C1, y, exp2_y); 367 exp2_y = vmlaq_f32(C1, y, exp2_y);
368 exp2_y = vmlaq_f32(C0, y, exp2_y); 368 exp2_y = vmlaq_f32(C0, y, exp2_y);
369 369
370 // Combine parts. 370 // Combine parts.
371 a_exp_b = vmulq_f32(exp2_y, two_n); 371 a_exp_b = vmulq_f32(exp2_y, two_n);
372 } 372 }
373 373
374 return a_exp_b; 374 return a_exp_b;
375 } 375 }
376 376
377 static void OverdriveAndSuppressNEON(float overdrive_scaling, 377 static void OverdriveNEON(float overdrive_scaling,
378 float hNl[PART_LEN1], 378 float hNlFb,
379 const float hNlFb, 379 float hNl[PART_LEN1]) {
380 float efw[2][PART_LEN1]) {
381 int i; 380 int i;
382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); 381 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
383 const float32x4_t vec_one = vdupq_n_f32(1.0f); 382 const float32x4_t vec_one = vdupq_n_f32(1.0f);
384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
385 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); 383 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);
386 384
387 // vectorized code (four at once) 385 // vectorized code (four at once)
388 for (i = 0; i + 3 < PART_LEN1; i += 4) { 386 for (i = 0; i + 3 < PART_LEN1; i += 4) {
389 // Weight subbands 387 // Weight subbands
390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); 388 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); 389 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); 390 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
393 const float32x4_t vec_weightCurve_hNlFb = 391 const float32x4_t vec_weightCurve_hNlFb =
394 vmulq_f32(vec_weightCurve, vec_hNlFb); 392 vmulq_f32(vec_weightCurve, vec_hNlFb);
395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); 393 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
396 const float32x4_t vec_one_weightCurve_hNl = 394 const float32x4_t vec_one_weightCurve_hNl =
397 vmulq_f32(vec_one_weightCurve, vec_hNl); 395 vmulq_f32(vec_one_weightCurve, vec_hNl);
398 const uint32x4_t vec_if0 = 396 const uint32x4_t vec_if0 =
399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); 397 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl));
400 const float32x4_t vec_one_weightCurve_add = 398 const float32x4_t vec_one_weightCurve_add =
401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); 399 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
402 const uint32x4_t vec_if1 = 400 const uint32x4_t vec_if1 =
403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); 401 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
404 402
405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); 403 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
406 404
407 { 405 const float32x4_t vec_overDriveCurve =
408 const float32x4_t vec_overDriveCurve = 406 vld1q_f32(&WebRtcAec_overDriveCurve[i]);
409 vld1q_f32(&WebRtcAec_overDriveCurve[i]); 407 const float32x4_t vec_overDriveSm_overDriveCurve =
410 const float32x4_t vec_overDriveSm_overDriveCurve = 408 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
411 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); 409 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); 410 vst1q_f32(&hNl[i], vec_hNl);
413 vst1q_f32(&hNl[i], vec_hNl);
414 }
415
416 // Suppress error signal
417 {
418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
422
423 // Ooura fft returns incorrect sign on imaginary component. It matters
424 // here because we are making an additive change with comfort noise.
425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
426 vst1q_f32(&efw[0][i], vec_efw_re);
427 vst1q_f32(&efw[1][i], vec_efw_im);
428 }
429 } 411 }
430 412
431 // scalar code for the remaining items. 413 // scalar code for the remaining items.
432 for (; i < PART_LEN1; i++) { 414 for (; i < PART_LEN1; i++) {
433 // Weight subbands 415 // Weight subbands
434 if (hNl[i] > hNlFb) { 416 if (hNl[i] > hNlFb) {
435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 417 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
436 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 418 (1 - WebRtcAec_weightCurve[i]) * hNl[i];
437 } 419 }
438 420
439 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); 421 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
422 }
423 }
440 424
441 // Suppress error signal 425 static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
426 int i;
427 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
428 // vectorized code (four at once)
429 for (i = 0; i + 3 < PART_LEN1; i += 4) {
430 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
431 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
432 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
433 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
434 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
435
436 // Ooura fft returns incorrect sign on imaginary component. It matters
437 // here because we are making an additive change with comfort noise.
438 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
439 vst1q_f32(&efw[0][i], vec_efw_re);
440 vst1q_f32(&efw[1][i], vec_efw_im);
441 }
442
443 // scalar code for the remaining items.
444 for (; i < PART_LEN1; i++) {
442 efw[0][i] *= hNl[i]; 445 efw[0][i] *= hNl[i];
443 efw[1][i] *= hNl[i]; 446 efw[1][i] *= hNl[i];
444 447
445 // Ooura fft returns incorrect sign on imaginary component. It matters 448 // Ooura fft returns incorrect sign on imaginary component. It matters
446 // here because we are making an additive change with comfort noise. 449 // here because we are making an additive change with comfort noise.
447 efw[1][i] *= -1; 450 efw[1][i] *= -1;
448 } 451 }
449 } 452 }
450 453
451 static int PartitionDelayNEON( 454 static int PartitionDelayNEON(
(...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after
715 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + 718 cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +
716 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / 719 coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /
717 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); 720 (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);
718 } 721 }
719 } 722 }
720 723
721 void WebRtcAec_InitAec_neon(void) { 724 void WebRtcAec_InitAec_neon(void) {
722 WebRtcAec_FilterFar = FilterFarNEON; 725 WebRtcAec_FilterFar = FilterFarNEON;
723 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; 726 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
724 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 727 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
725 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; 728 WebRtcAec_Overdrive = OverdriveNEON;
729 WebRtcAec_Suppress = SuppressNEON;
726 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; 730 WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;
727 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; 731 WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;
728 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; 732 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
729 WebRtcAec_PartitionDelay = PartitionDelayNEON; 733 WebRtcAec_PartitionDelay = PartitionDelayNEON;
730 WebRtcAec_WindowData = WindowDataNEON; 734 WebRtcAec_WindowData = WindowDataNEON;
731 } 735 }
732 } // namespace webrtc 736 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.cc ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698