Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(223)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core_neon.cc

Issue 1939723002: Removed the state as an input to OverdriveAndSuppress in the AEC. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Rebase with latest master Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 exp2_y = vmlaq_f32(C1, y, exp2_y); 367 exp2_y = vmlaq_f32(C1, y, exp2_y);
368 exp2_y = vmlaq_f32(C0, y, exp2_y); 368 exp2_y = vmlaq_f32(C0, y, exp2_y);
369 369
370 // Combine parts. 370 // Combine parts.
371 a_exp_b = vmulq_f32(exp2_y, two_n); 371 a_exp_b = vmulq_f32(exp2_y, two_n);
372 } 372 }
373 373
374 return a_exp_b; 374 return a_exp_b;
375 } 375 }
376 376
377 static void OverdriveAndSuppressNEON(AecCore* aec, 377 static void OverdriveAndSuppressNEON(float overdrive_scaling,
378 float hNl[PART_LEN1], 378 float hNl[PART_LEN1],
379 const float hNlFb, 379 const float hNlFb,
380 float efw[2][PART_LEN1]) { 380 float efw[2][PART_LEN1]) {
381 int i; 381 int i;
382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); 382 const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
383 const float32x4_t vec_one = vdupq_n_f32(1.0f); 383 const float32x4_t vec_one = vdupq_n_f32(1.0f);
384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); 384 const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
385 const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm); 385 const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);
386 386
387 // vectorized code (four at once) 387 // vectorized code (four at once)
388 for (i = 0; i + 3 < PART_LEN1; i += 4) { 388 for (i = 0; i + 3 < PART_LEN1; i += 4) {
389 // Weight subbands 389 // Weight subbands
390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]); 390 float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); 391 const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); 392 const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
393 const float32x4_t vec_weightCurve_hNlFb = 393 const float32x4_t vec_weightCurve_hNlFb =
394 vmulq_f32(vec_weightCurve, vec_hNlFb); 394 vmulq_f32(vec_weightCurve, vec_hNlFb);
395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); 395 const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
396 const float32x4_t vec_one_weightCurve_hNl = 396 const float32x4_t vec_one_weightCurve_hNl =
397 vmulq_f32(vec_one_weightCurve, vec_hNl); 397 vmulq_f32(vec_one_weightCurve, vec_hNl);
398 const uint32x4_t vec_if0 = 398 const uint32x4_t vec_if0 =
399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); 399 vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl));
400 const float32x4_t vec_one_weightCurve_add = 400 const float32x4_t vec_one_weightCurve_add =
401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); 401 vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
402 const uint32x4_t vec_if1 = 402 const uint32x4_t vec_if1 =
403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); 403 vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
404 404
405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); 405 vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
406 406
407 { 407 {
408 const float32x4_t vec_overDriveCurve = 408 const float32x4_t vec_overDriveCurve =
409 vld1q_f32(&WebRtcAec_overDriveCurve[i]); 409 vld1q_f32(&WebRtcAec_overDriveCurve[i]);
410 const float32x4_t vec_overDriveSm_overDriveCurve = 410 const float32x4_t vec_overDriveSm_overDriveCurve =
411 vmulq_f32(vec_overDriveSm, vec_overDriveCurve); 411 vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); 412 vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
413 vst1q_f32(&hNl[i], vec_hNl); 413 vst1q_f32(&hNl[i], vec_hNl);
414 } 414 }
415 415
416 // Suppress error signal 416 // Suppress error signal
417 { 417 {
418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); 418 float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); 419 float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); 420 vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); 421 vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
422 422
423 // Ooura fft returns incorrect sign on imaginary component. It matters 423 // Ooura fft returns incorrect sign on imaginary component. It matters
424 // here because we are making an additive change with comfort noise. 424 // here because we are making an additive change with comfort noise.
425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); 425 vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
426 vst1q_f32(&efw[0][i], vec_efw_re); 426 vst1q_f32(&efw[0][i], vec_efw_re);
427 vst1q_f32(&efw[1][i], vec_efw_im); 427 vst1q_f32(&efw[1][i], vec_efw_im);
428 } 428 }
429 } 429 }
430 430
431 // scalar code for the remaining items. 431 // scalar code for the remaining items.
432 for (; i < PART_LEN1; i++) { 432 for (; i < PART_LEN1; i++) {
433 // Weight subbands 433 // Weight subbands
434 if (hNl[i] > hNlFb) { 434 if (hNl[i] > hNlFb) {
435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 435 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
436 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 436 (1 - WebRtcAec_weightCurve[i]) * hNl[i];
437 } 437 }
438 438
439 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 439 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
440 440
441 // Suppress error signal 441 // Suppress error signal
442 efw[0][i] *= hNl[i]; 442 efw[0][i] *= hNl[i];
443 efw[1][i] *= hNl[i]; 443 efw[1][i] *= hNl[i];
444 444
445 // Ooura fft returns incorrect sign on imaginary component. It matters 445 // Ooura fft returns incorrect sign on imaginary component. It matters
446 // here because we are making an additive change with comfort noise. 446 // here because we are making an additive change with comfort noise.
447 efw[1][i] *= -1; 447 efw[1][i] *= -1;
448 } 448 }
449 } 449 }
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after
717 WebRtcAec_FilterFar = FilterFarNEON; 717 WebRtcAec_FilterFar = FilterFarNEON;
718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; 718 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; 719 WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; 720 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; 721 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON; 722 WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
723 WebRtcAec_PartitionDelay = PartitionDelayNEON; 723 WebRtcAec_PartitionDelay = PartitionDelayNEON;
724 WebRtcAec_WindowData = WindowDataNEON; 724 WebRtcAec_WindowData = WindowDataNEON;
725 } 725 }
726 } // namespace webrtc 726 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/aec/aec_core_mips.cc ('k') | webrtc/modules/audio_processing/aec/aec_core_sse2.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698