webrtc/modules/audio_processing/aecm/aecm_core_mips.c - Issue 1857153002: Changed AECM to be built using C++

Unified Diff: webrtc/modules/audio_processing/aecm/aecm_core_mips.c

Issue 1857153002: Changed AECM to be built using C++ (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Added external declaration to the delay estimator wrapper inclusion Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: webrtc/modules/audio_processing/aecm/aecm_core_mips.c

diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c

deleted file mode 100644

index 3ca9982ebfa4321716687d73d2c056add62f133d..0000000000000000000000000000000000000000

--- a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c

+++ /dev/null

@@ -1,1566 +0,0 @@

-/*

- *

- * Use of this source code is governed by a BSD-style license

- * that can be found in the LICENSE file in the root of the source

- * tree. An additional intellectual property rights grant can be found

- * in the file PATENTS. All contributing project authors may

- * be found in the AUTHORS file in the root of the source tree.

- */

-#include "webrtc/modules/audio_processing/aecm/aecm_core.h"

-#include <assert.h>

-#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h"

-#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"

-static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {

- 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,

- 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,

- 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,

- 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,

- 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,

- 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,

- 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,

- 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384

-};

-static const int16_t kNoiseEstQDomain = 15;

-static const int16_t kNoiseEstIncCount = 5;

-static int16_t coefTable[] = {

- 0, 4, 256, 260, 128, 132, 384, 388,

- 64, 68, 320, 324, 192, 196, 448, 452,

- 32, 36, 288, 292, 160, 164, 416, 420,

- 96, 100, 352, 356, 224, 228, 480, 484,

- 16, 20, 272, 276, 144, 148, 400, 404,

- 80, 84, 336, 340, 208, 212, 464, 468,

- 48, 52, 304, 308, 176, 180, 432, 436,

- 112, 116, 368, 372, 240, 244, 496, 500,

- 8, 12, 264, 268, 136, 140, 392, 396,

- 72, 76, 328, 332, 200, 204, 456, 460,

- 40, 44, 296, 300, 168, 172, 424, 428,

- 104, 108, 360, 364, 232, 236, 488, 492,

- 24, 28, 280, 284, 152, 156, 408, 412,

- 88, 92, 344, 348, 216, 220, 472, 476,

- 56, 60, 312, 316, 184, 188, 440, 444,

- 120, 124, 376, 380, 248, 252, 504, 508

-};

-static int16_t coefTable_ifft[] = {

- 0, 512, 256, 508, 128, 252, 384, 380,

- 64, 124, 320, 444, 192, 188, 448, 316,

- 32, 60, 288, 476, 160, 220, 416, 348,

- 96, 92, 352, 412, 224, 156, 480, 284,

- 16, 28, 272, 492, 144, 236, 400, 364,

- 80, 108, 336, 428, 208, 172, 464, 300,

- 48, 44, 304, 460, 176, 204, 432, 332,

- 112, 76, 368, 396, 240, 140, 496, 268,

- 8, 12, 264, 500, 136, 244, 392, 372,

- 72, 116, 328, 436, 200, 180, 456, 308,

- 40, 52, 296, 468, 168, 212, 424, 340,

- 104, 84, 360, 404, 232, 148, 488, 276,

- 24, 20, 280, 484, 152, 228, 408, 356,

- 88, 100, 344, 420, 216, 164, 472, 292,

- 56, 36, 312, 452, 184, 196, 440, 324,

- 120, 68, 376, 388, 248, 132, 504, 260

-};

-static void ComfortNoise(AecmCore* aecm,

- const uint16_t* dfa,

- ComplexInt16* out,

- const int16_t* lambda);

-static void WindowAndFFT(AecmCore* aecm,

- int16_t* fft,

- const int16_t* time_signal,

- ComplexInt16* freq_signal,

- int time_signal_scaling) {

- int i, j;

- int32_t tmp1, tmp2, tmp3, tmp4;

- int16_t* pfrfi;

- ComplexInt16* pfreq_signal;

- int16_t f_coef, s_coef;

- int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;

- int32_t hann, hann1, coefs;

- memset(fft, 0, sizeof(int16_t) * PART_LEN4);

- // FFT of signal

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "addiu %[shift], %[time_signal_scaling], -14 \n\t"

- "addiu %[i], $zero, 64 \n\t"

- "addiu %[load_ptr], %[time_signal], 0 \n\t"

- "addiu %[hann], %[hanning], 0 \n\t"

- "addiu %[hann1], %[hanning], 128 \n\t"

- "addiu %[coefs], %[coefTable], 0 \n\t"

- "bltz %[shift], 2f \n\t"

- " negu %[shift1], %[shift] \n\t"

- "1: \n\t"

- "lh %[tmp1], 0(%[load_ptr]) \n\t"

- "lh %[tmp2], 0(%[hann]) \n\t"

- "lh %[tmp3], 128(%[load_ptr]) \n\t"

- "lh %[tmp4], 0(%[hann1]) \n\t"

- "addiu %[i], %[i], -1 \n\t"

- "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

- "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

- "lh %[f_coef], 0(%[coefs]) \n\t"

- "lh %[s_coef], 2(%[coefs]) \n\t"

- "addiu %[load_ptr], %[load_ptr], 2 \n\t"

- "addiu %[hann], %[hann], 2 \n\t"

- "addiu %[hann1], %[hann1], -2 \n\t"

- "addu %[store_ptr1], %[fft], %[f_coef] \n\t"

- "addu %[store_ptr2], %[fft], %[s_coef] \n\t"

- "sllv %[tmp1], %[tmp1], %[shift] \n\t"

- "sllv %[tmp3], %[tmp3], %[shift] \n\t"

- "sh %[tmp1], 0(%[store_ptr1]) \n\t"

- "sh %[tmp3], 0(%[store_ptr2]) \n\t"

- "bgtz %[i], 1b \n\t"

- " addiu %[coefs], %[coefs], 4 \n\t"

- "b 3f \n\t"

- " nop \n\t"

- "2: \n\t"

- "lh %[tmp1], 0(%[load_ptr]) \n\t"

- "lh %[tmp2], 0(%[hann]) \n\t"

- "lh %[tmp3], 128(%[load_ptr]) \n\t"

- "lh %[tmp4], 0(%[hann1]) \n\t"

- "addiu %[i], %[i], -1 \n\t"

- "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

- "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

- "lh %[f_coef], 0(%[coefs]) \n\t"

- "lh %[s_coef], 2(%[coefs]) \n\t"

- "addiu %[load_ptr], %[load_ptr], 2 \n\t"

- "addiu %[hann], %[hann], 2 \n\t"

- "addiu %[hann1], %[hann1], -2 \n\t"

- "addu %[store_ptr1], %[fft], %[f_coef] \n\t"

- "addu %[store_ptr2], %[fft], %[s_coef] \n\t"

- "srav %[tmp1], %[tmp1], %[shift1] \n\t"

- "srav %[tmp3], %[tmp3], %[shift1] \n\t"

- "sh %[tmp1], 0(%[store_ptr1]) \n\t"

- "sh %[tmp3], 0(%[store_ptr2]) \n\t"

- "bgtz %[i], 2b \n\t"

- " addiu %[coefs], %[coefs], 4 \n\t"

- "3: \n\t"

- ".set pop \n\t"

- : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann),

- [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs),

- [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),

- [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef),

- [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1),

- [store_ptr2] "=&r" (store_ptr2)

- : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable),

- [time_signal_scaling] "r" (time_signal_scaling),

- [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft)

- : "memory", "hi", "lo"

- );

- WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);

- pfrfi = fft;

- pfreq_signal = freq_signal;

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "addiu %[j], $zero, 128 \n\t"

- "1: \n\t"

- "lh %[tmp1], 0(%[pfrfi]) \n\t"

- "lh %[tmp2], 2(%[pfrfi]) \n\t"

- "lh %[tmp3], 4(%[pfrfi]) \n\t"

- "lh %[tmp4], 6(%[pfrfi]) \n\t"

- "subu %[tmp2], $zero, %[tmp2] \n\t"

- "sh %[tmp1], 0(%[pfreq_signal]) \n\t"

- "sh %[tmp2], 2(%[pfreq_signal]) \n\t"

- "subu %[tmp4], $zero, %[tmp4] \n\t"

- "sh %[tmp3], 4(%[pfreq_signal]) \n\t"

- "sh %[tmp4], 6(%[pfreq_signal]) \n\t"

- "lh %[tmp1], 8(%[pfrfi]) \n\t"

- "lh %[tmp2], 10(%[pfrfi]) \n\t"

- "lh %[tmp3], 12(%[pfrfi]) \n\t"

- "lh %[tmp4], 14(%[pfrfi]) \n\t"

- "addiu %[j], %[j], -8 \n\t"

- "subu %[tmp2], $zero, %[tmp2] \n\t"

- "sh %[tmp1], 8(%[pfreq_signal]) \n\t"

- "sh %[tmp2], 10(%[pfreq_signal]) \n\t"

- "subu %[tmp4], $zero, %[tmp4] \n\t"

- "sh %[tmp3], 12(%[pfreq_signal]) \n\t"

- "sh %[tmp4], 14(%[pfreq_signal]) \n\t"

- "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t"

- "bgtz %[j], 1b \n\t"

- " addiu %[pfrfi], %[pfrfi], 16 \n\t"

- ".set pop \n\t"

- : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),

- [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal),

- [tmp4] "=&r" (tmp4)

- :

- : "memory"

- );

-static void InverseFFTAndWindow(AecmCore* aecm,

- int16_t* fft,

- ComplexInt16* efw,

- int16_t* output,

- const int16_t* nearendClean) {

- int i, outCFFT;

- int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;

- int16_t* pcoefTable_ifft = coefTable_ifft;

- int16_t* pfft = fft;

- int16_t* ppfft = fft;

- ComplexInt16* pefw = efw;

- int32_t out_aecm;

- int16_t* paecm_buf = aecm->outBuf;

- const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;

- const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];

- int16_t* output1 = output;

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "addiu %[i], $zero, 64 \n\t"

- "1: \n\t"

- "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp_re], 0(%[pefw]) \n\t"

- "lh %[tmp_im], 2(%[pefw]) \n\t"

- "addu %[pfft], %[fft], %[tmp2] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "addu %[pfft], %[fft], %[tmp1] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "subu %[tmp_im], $zero, %[tmp_im] \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp_re], 4(%[pefw]) \n\t"

- "lh %[tmp_im], 6(%[pefw]) \n\t"

- "addu %[pfft], %[fft], %[tmp2] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "addu %[pfft], %[fft], %[tmp1] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "subu %[tmp_im], $zero, %[tmp_im] \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp_re], 8(%[pefw]) \n\t"

- "lh %[tmp_im], 10(%[pefw]) \n\t"

- "addu %[pfft], %[fft], %[tmp2] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "addu %[pfft], %[fft], %[tmp1] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "subu %[tmp_im], $zero, %[tmp_im] \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t"

- "lh %[tmp_re], 12(%[pefw]) \n\t"

- "lh %[tmp_im], 14(%[pefw]) \n\t"

- "addu %[pfft], %[fft], %[tmp2] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "addu %[pfft], %[fft], %[tmp1] \n\t"

- "sh %[tmp_re], 0(%[pfft]) \n\t"

- "subu %[tmp_im], $zero, %[tmp_im] \n\t"

- "sh %[tmp_im], 2(%[pfft]) \n\t"

- "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t"

- "addiu %[i], %[i], -4 \n\t"

- "bgtz %[i], 1b \n\t"

- " addiu %[pefw], %[pefw], 16 \n\t"

- ".set pop \n\t"

- : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),

- [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im),

- [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft),

- [fft] "+r" (fft)

- :

- : "memory"

- );

- fft[2] = efw[PART_LEN].real;

- fft[3] = -efw[PART_LEN].imag;

- outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);

- pfft = fft;

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "addiu %[i], $zero, 128 \n\t"

- "1: \n\t"

- "lh %[tmp1], 0(%[ppfft]) \n\t"

- "lh %[tmp2], 4(%[ppfft]) \n\t"

- "lh %[tmp3], 8(%[ppfft]) \n\t"

- "lh %[tmp4], 12(%[ppfft]) \n\t"

- "addiu %[i], %[i], -4 \n\t"

- "sh %[tmp1], 0(%[pfft]) \n\t"

- "sh %[tmp2], 2(%[pfft]) \n\t"

- "sh %[tmp3], 4(%[pfft]) \n\t"

- "sh %[tmp4], 6(%[pfft]) \n\t"

- "addiu %[ppfft], %[ppfft], 16 \n\t"

- "bgtz %[i], 1b \n\t"

- " addiu %[pfft], %[pfft], 8 \n\t"

- ".set pop \n\t"

- : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),

- [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),

- [ppfft] "+r" (ppfft)

- :

- : "memory"

- );

- pfft = fft;

- out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "addiu %[i], $zero, 64 \n\t"

- "11: \n\t"

- "lh %[tmp1], 0(%[pfft]) \n\t"

- "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t"

- "addiu %[i], %[i], -2 \n\t"

- "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

- "lh %[tmp3], 2(%[pfft]) \n\t"

- "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t"

- "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

- "addiu %[tmp1], %[tmp1], 8192 \n\t"

- "sra %[tmp1], %[tmp1], 14 \n\t"

- "addiu %[tmp3], %[tmp3], 8192 \n\t"

- "sra %[tmp3], %[tmp3], 14 \n\t"

- "bgez %[out_aecm], 1f \n\t"

- " negu %[tmp2], %[out_aecm] \n\t"

- "srav %[tmp1], %[tmp1], %[tmp2] \n\t"

- "b 2f \n\t"

- " srav %[tmp3], %[tmp3], %[tmp2] \n\t"

- "1: \n\t"

- "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"

- "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"

- "2: \n\t"

- "lh %[tmp4], 0(%[paecm_buf]) \n\t"

- "lh %[tmp2], 2(%[paecm_buf]) \n\t"

- "addu %[tmp3], %[tmp3], %[tmp2] \n\t"

- "addu %[tmp1], %[tmp1], %[tmp4] \n\t"

-#if defined(MIPS_DSP_R1_LE)

- "shll_s.w %[tmp1], %[tmp1], 16 \n\t"

- "sra %[tmp1], %[tmp1], 16 \n\t"

- "shll_s.w %[tmp3], %[tmp3], 16 \n\t"

- "sra %[tmp3], %[tmp3], 16 \n\t"

-#else // #if defined(MIPS_DSP_R1_LE)

- "sra %[tmp4], %[tmp1], 31 \n\t"

- "sra %[tmp2], %[tmp1], 15 \n\t"

- "beq %[tmp4], %[tmp2], 3f \n\t"

- " ori %[tmp2], $zero, 0x7fff \n\t"

- "xor %[tmp1], %[tmp2], %[tmp4] \n\t"

- "3: \n\t"

- "sra %[tmp2], %[tmp3], 31 \n\t"

- "sra %[tmp4], %[tmp3], 15 \n\t"

- "beq %[tmp2], %[tmp4], 4f \n\t"

- " ori %[tmp4], $zero, 0x7fff \n\t"

- "xor %[tmp3], %[tmp4], %[tmp2] \n\t"

- "4: \n\t"

-#endif // #if defined(MIPS_DSP_R1_LE)

- "sh %[tmp1], 0(%[pfft]) \n\t"

- "sh %[tmp1], 0(%[output1]) \n\t"

- "sh %[tmp3], 2(%[pfft]) \n\t"

- "sh %[tmp3], 2(%[output1]) \n\t"

- "lh %[tmp1], 128(%[pfft]) \n\t"

- "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t"

- "mul %[tmp1], %[tmp1], %[tmp2] \n\t"

- "lh %[tmp3], 130(%[pfft]) \n\t"

- "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t"

- "mul %[tmp3], %[tmp3], %[tmp4] \n\t"

- "sra %[tmp1], %[tmp1], 14 \n\t"

- "sra %[tmp3], %[tmp3], 14 \n\t"

- "bgez %[out_aecm], 5f \n\t"

- " negu %[tmp2], %[out_aecm] \n\t"

- "srav %[tmp3], %[tmp3], %[tmp2] \n\t"

- "b 6f \n\t"

- " srav %[tmp1], %[tmp1], %[tmp2] \n\t"

- "5: \n\t"

- "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"

- "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"

- "6: \n\t"

-#if defined(MIPS_DSP_R1_LE)

- "shll_s.w %[tmp1], %[tmp1], 16 \n\t"

- "sra %[tmp1], %[tmp1], 16 \n\t"

- "shll_s.w %[tmp3], %[tmp3], 16 \n\t"

- "sra %[tmp3], %[tmp3], 16 \n\t"

-#else // #if defined(MIPS_DSP_R1_LE)

- "sra %[tmp4], %[tmp1], 31 \n\t"

- "sra %[tmp2], %[tmp1], 15 \n\t"

- "beq %[tmp4], %[tmp2], 7f \n\t"

- " ori %[tmp2], $zero, 0x7fff \n\t"

- "xor %[tmp1], %[tmp2], %[tmp4] \n\t"

- "7: \n\t"

- "sra %[tmp2], %[tmp3], 31 \n\t"

- "sra %[tmp4], %[tmp3], 15 \n\t"

- "beq %[tmp2], %[tmp4], 8f \n\t"

- " ori %[tmp4], $zero, 0x7fff \n\t"

- "xor %[tmp3], %[tmp4], %[tmp2] \n\t"

- "8: \n\t"

-#endif // #if defined(MIPS_DSP_R1_LE)

- "sh %[tmp1], 0(%[paecm_buf]) \n\t"

- "sh %[tmp3], 2(%[paecm_buf]) \n\t"

- "addiu %[output1], %[output1], 4 \n\t"

- "addiu %[paecm_buf], %[paecm_buf], 4 \n\t"

- "addiu %[pfft], %[pfft], 4 \n\t"

- "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t"

- "bgtz %[i], 11b \n\t"

- " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t"

- ".set pop \n\t"

- : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),

- [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),

- [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i),

- [pp_kSqrtHanning] "+r" (pp_kSqrtHanning),

- [p_kSqrtHanning] "+r" (p_kSqrtHanning)

- : [out_aecm] "r" (out_aecm),

- [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning)

- : "hi", "lo","memory"

- );

- // Copy the current block to the old position

- // (aecm->outBuf is shifted elsewhere)

- memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);

- memcpy(aecm->dBufNoisy,

- aecm->dBufNoisy + PART_LEN,

- sizeof(int16_t) * PART_LEN);

- if (nearendClean != NULL) {

- memcpy(aecm->dBufClean,

- aecm->dBufClean + PART_LEN,

- sizeof(int16_t) * PART_LEN);

- }

-void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,

- const uint16_t* far_spectrum,

- int32_t* echo_est,

- uint32_t* far_energy,

- uint32_t* echo_energy_adapt,

- uint32_t* echo_energy_stored) {

- int i;

- uint32_t par1 = (*far_energy);

- uint32_t par2 = (*echo_energy_adapt);

- uint32_t par3 = (*echo_energy_stored);

- int16_t* ch_stored_p = &(aecm->channelStored[0]);

- int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);

- uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));

- int32_t* echo_p = &(echo_est[0]);

- int32_t temp0, stored0, echo0, adept0, spectrum0;

- int32_t stored1, adept1, spectrum1, echo1, temp1;

- // Get energy for the delayed far end signal and estimated

- // echo using both stored and adapted channels.

- for (i = 0; i < PART_LEN; i+= 4) {

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "lh %[stored0], 0(%[ch_stored_p]) \n\t"

- "lhu %[adept0], 0(%[ch_adapt_p]) \n\t"

- "lhu %[spectrum0], 0(%[spectrum_p]) \n\t"

- "lh %[stored1], 2(%[ch_stored_p]) \n\t"

- "lhu %[adept1], 2(%[ch_adapt_p]) \n\t"

- "lhu %[spectrum1], 2(%[spectrum_p]) \n\t"

- "mul %[echo0], %[stored0], %[spectrum0] \n\t"

- "mul %[temp0], %[adept0], %[spectrum0] \n\t"

- "mul %[echo1], %[stored1], %[spectrum1] \n\t"

- "mul %[temp1], %[adept1], %[spectrum1] \n\t"

- "addu %[par1], %[par1], %[spectrum0] \n\t"

- "addu %[par1], %[par1], %[spectrum1] \n\t"

- "addiu %[echo_p], %[echo_p], 16 \n\t"

- "addu %[par3], %[par3], %[echo0] \n\t"

- "addu %[par2], %[par2], %[temp0] \n\t"

- "addu %[par3], %[par3], %[echo1] \n\t"

- "addu %[par2], %[par2], %[temp1] \n\t"

- "usw %[echo0], -16(%[echo_p]) \n\t"

- "usw %[echo1], -12(%[echo_p]) \n\t"

- "lh %[stored0], 4(%[ch_stored_p]) \n\t"

- "lhu %[adept0], 4(%[ch_adapt_p]) \n\t"

- "lhu %[spectrum0], 4(%[spectrum_p]) \n\t"

- "lh %[stored1], 6(%[ch_stored_p]) \n\t"

- "lhu %[adept1], 6(%[ch_adapt_p]) \n\t"

- "lhu %[spectrum1], 6(%[spectrum_p]) \n\t"

- "mul %[echo0], %[stored0], %[spectrum0] \n\t"

- "mul %[temp0], %[adept0], %[spectrum0] \n\t"

- "mul %[echo1], %[stored1], %[spectrum1] \n\t"

- "mul %[temp1], %[adept1], %[spectrum1] \n\t"

- "addu %[par1], %[par1], %[spectrum0] \n\t"

- "addu %[par1], %[par1], %[spectrum1] \n\t"

- "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t"

- "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t"

- "addiu %[spectrum_p], %[spectrum_p], 8 \n\t"

- "addu %[par3], %[par3], %[echo0] \n\t"

- "addu %[par2], %[par2], %[temp0] \n\t"

- "addu %[par3], %[par3], %[echo1] \n\t"

- "addu %[par2], %[par2], %[temp1] \n\t"

- "usw %[echo0], -8(%[echo_p]) \n\t"

- "usw %[echo1], -4(%[echo_p]) \n\t"

- ".set pop \n\t"

- : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0),

- [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0),

- [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3),

- [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1),

- [adept1] "=&r" (adept1), [echo1] "=&r" (echo1),

- [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1),

- [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p),

- [spectrum_p] "+r" (spectrum_p)

- :

- : "hi", "lo", "memory"

- );

- }

- echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],

- far_spectrum[PART_LEN]);

- par1 += (uint32_t)(far_spectrum[PART_LEN]);

- par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];

- par3 += (uint32_t)echo_est[PART_LEN];

- (*far_energy) = par1;

- (*echo_energy_adapt) = par2;

- (*echo_energy_stored) = par3;

-#if defined(MIPS_DSP_R1_LE)

-void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,

- const uint16_t* far_spectrum,

- int32_t* echo_est) {

- int i;

- int16_t* temp1;

- uint16_t* temp8;

- int32_t temp0, temp2, temp3, temp4, temp5, temp6;

- int32_t* temp7 = &(echo_est[0]);

- temp1 = &(aecm->channelStored[0]);

- temp8 = (uint16_t*)(&far_spectrum[0]);

- // During startup we store the channel every block.

- memcpy(aecm->channelStored, aecm->channelAdapt16,

- sizeof(int16_t) * PART_LEN1);

- // Recalculate echo estimate

- for (i = 0; i < PART_LEN; i += 4) {

- __asm __volatile (

- "ulw %[temp0], 0(%[temp8]) \n\t"

- "ulw %[temp2], 0(%[temp1]) \n\t"

- "ulw %[temp4], 4(%[temp8]) \n\t"

- "ulw %[temp5], 4(%[temp1]) \n\t"

- "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t"

- "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t"

- "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t"

- "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t"

- "addiu %[temp7], %[temp7], 16 \n\t"

- "addiu %[temp1], %[temp1], 8 \n\t"

- "addiu %[temp8], %[temp8], 8 \n\t"

- "sra %[temp3], %[temp3], 1 \n\t"

- "sra %[temp0], %[temp0], 1 \n\t"

- "sra %[temp6], %[temp6], 1 \n\t"

- "sra %[temp4], %[temp4], 1 \n\t"

- "usw %[temp3], -12(%[temp7]) \n\t"

- "usw %[temp0], -16(%[temp7]) \n\t"

- "usw %[temp6], -4(%[temp7]) \n\t"

- "usw %[temp4], -8(%[temp7]) \n\t"

- : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

- [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),

- [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7)

- :

- : "hi", "lo", "memory"

- );

- }

- echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],

- far_spectrum[i]);

-void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) {

- int i;

- int32_t* temp3;

- int16_t* temp0;

- int32_t temp1, temp2, temp4, temp5;

- temp0 = &(aecm->channelStored[0]);

- temp3 = &(aecm->channelAdapt32[0]);

- // The stored channel has a significantly lower MSE than the adaptive one for

- // two consecutive calculations. Reset the adaptive channel.

- memcpy(aecm->channelAdapt16,

- aecm->channelStored,

- sizeof(int16_t) * PART_LEN1);

- // Restore the W32 channel

- for (i = 0; i < PART_LEN; i += 4) {

- __asm __volatile (

- "ulw %[temp1], 0(%[temp0]) \n\t"

- "ulw %[temp4], 4(%[temp0]) \n\t"

- "preceq.w.phl %[temp2], %[temp1] \n\t"

- "preceq.w.phr %[temp1], %[temp1] \n\t"

- "preceq.w.phl %[temp5], %[temp4] \n\t"

- "preceq.w.phr %[temp4], %[temp4] \n\t"

- "addiu %[temp0], %[temp0], 8 \n\t"

- "usw %[temp2], 4(%[temp3]) \n\t"

- "usw %[temp1], 0(%[temp3]) \n\t"

- "usw %[temp5], 12(%[temp3]) \n\t"

- "usw %[temp4], 8(%[temp3]) \n\t"

- "addiu %[temp3], %[temp3], 16 \n\t"

- : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),

- [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),

- [temp3] "+r" (temp3), [temp0] "+r" (temp0)

- :

- : "memory"

- );

- }

- aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16;

-#endif // #if defined(MIPS_DSP_R1_LE)

-// Transforms a time domain signal into the frequency domain, outputting the

-// complex valued signal, absolute value and sum of absolute values.

-//

-// time_signal [in] Pointer to time domain signal

-// freq_signal_real [out] Pointer to real part of frequency domain array

-// freq_signal_imag [out] Pointer to imaginary part of frequency domain

-// array

-// freq_signal_abs [out] Pointer to absolute value of frequency domain

-// array

-// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in

-// the frequency domain array

-// return value The Q-domain of current frequency values

-//

-static int TimeToFrequencyDomain(AecmCore* aecm,

- const int16_t* time_signal,

- ComplexInt16* freq_signal,

- uint16_t* freq_signal_abs,

- uint32_t* freq_signal_sum_abs) {

- int i = 0;

- int time_signal_scaling = 0;

- // In fft_buf, +16 for 32-byte alignment.

- int16_t fft_buf[PART_LEN4 + 16];

- int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);

- int16_t tmp16no1;

-#if !defined(MIPS_DSP_R2_LE)

- int32_t tmp32no1;

- int32_t tmp32no2;

- int16_t tmp16no2;

-#else

- int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;

- int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;

- int16_t* freqp;

- uint16_t* freqabsp;

- uint32_t freqt0, freqt1, freqt2, freqt3;

- uint32_t freqs;

-#endif

-#ifdef AECM_DYNAMIC_Q

- tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);

- time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);

-#endif

- WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);

- // Extract imaginary and real part,

- // calculate the magnitude for all frequency bins

- freq_signal[0].imag = 0;

- freq_signal[PART_LEN].imag = 0;

- freq_signal[PART_LEN].real = fft[PART_LEN2];

- freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);

- freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(

- freq_signal[PART_LEN].real);

- (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +

- (uint32_t)(freq_signal_abs[PART_LEN]);

-#if !defined(MIPS_DSP_R2_LE)

- for (i = 1; i < PART_LEN; i++) {

- if (freq_signal[i].real == 0)

- {

- freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(

- freq_signal[i].imag);

- }

- else if (freq_signal[i].imag == 0)

- {

- freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(

- freq_signal[i].real);

- }

- else

- {

- // Approximation for magnitude of complex fft output

- // magn = sqrt(real^2 + imag^2)

- //

- // The parameters alpha and beta are stored in Q15

- tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);

- tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);

- tmp32no1 = tmp16no1 * tmp16no1;

- tmp32no2 = tmp16no2 * tmp16no2;

- tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);

- tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);

- freq_signal_abs[i] = (uint16_t)tmp32no1;

- }

- (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];

- }

-#else // #if !defined(MIPS_DSP_R2_LE)

- freqs = (uint32_t)(freq_signal_abs[0]) +

- (uint32_t)(freq_signal_abs[PART_LEN]);

- freqp = &(freq_signal[1].real);

- __asm __volatile (

- "lw %[freqt0], 0(%[freqp]) \n\t"

- "lw %[freqt1], 4(%[freqp]) \n\t"

- "lw %[freqt2], 8(%[freqp]) \n\t"

- "mult $ac0, $zero, $zero \n\t"

- "mult $ac1, $zero, $zero \n\t"

- "mult $ac2, $zero, $zero \n\t"

- "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"

- "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"

- "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"

- "addiu %[freqp], %[freqp], 12 \n\t"

- "extr.w %[tmp32no20], $ac0, 1 \n\t"

- "extr.w %[tmp32no21], $ac1, 1 \n\t"

- "extr.w %[tmp32no22], $ac2, 1 \n\t"

- : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),

- [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp),

- [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),

- [tmp32no22] "=r" (tmp32no22)

- :

- : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"

- );

- tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);

- tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);

- tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);

- freq_signal_abs[1] = (uint16_t)tmp32no10;

- freq_signal_abs[2] = (uint16_t)tmp32no11;

- freq_signal_abs[3] = (uint16_t)tmp32no12;

- freqs += (uint32_t)tmp32no10;

- freqs += (uint32_t)tmp32no11;

- freqs += (uint32_t)tmp32no12;

- freqabsp = &(freq_signal_abs[4]);

- for (i = 4; i < PART_LEN; i+=4)

- {

- __asm __volatile (

- "ulw %[freqt0], 0(%[freqp]) \n\t"

- "ulw %[freqt1], 4(%[freqp]) \n\t"

- "ulw %[freqt2], 8(%[freqp]) \n\t"

- "ulw %[freqt3], 12(%[freqp]) \n\t"

- "mult $ac0, $zero, $zero \n\t"

- "mult $ac1, $zero, $zero \n\t"

- "mult $ac2, $zero, $zero \n\t"

- "mult $ac3, $zero, $zero \n\t"

- "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"

- "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"

- "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"

- "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t"

- "addiu %[freqp], %[freqp], 16 \n\t"

- "addiu %[freqabsp], %[freqabsp], 8 \n\t"

- "extr.w %[tmp32no20], $ac0, 1 \n\t"

- "extr.w %[tmp32no21], $ac1, 1 \n\t"

- "extr.w %[tmp32no22], $ac2, 1 \n\t"

- "extr.w %[tmp32no23], $ac3, 1 \n\t"

- : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),

- [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3),

- [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),

- [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23),

- [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp)

- :

- : "memory", "hi", "lo", "$ac1hi", "$ac1lo",

- "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"

- );

- tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);

- tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);

- tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);

- tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);

- __asm __volatile (

- "sh %[tmp32no10], -8(%[freqabsp]) \n\t"

- "sh %[tmp32no11], -6(%[freqabsp]) \n\t"

- "sh %[tmp32no12], -4(%[freqabsp]) \n\t"

- "sh %[tmp32no13], -2(%[freqabsp]) \n\t"

- "addu %[freqs], %[freqs], %[tmp32no10] \n\t"

- "addu %[freqs], %[freqs], %[tmp32no11] \n\t"

- "addu %[freqs], %[freqs], %[tmp32no12] \n\t"

- "addu %[freqs], %[freqs], %[tmp32no13] \n\t"

- : [freqs] "+r" (freqs)

- : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11),

- [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13),

- [freqabsp] "r" (freqabsp)

- : "memory"

- );

- }

- (*freq_signal_sum_abs) = freqs;

-#endif

- return time_signal_scaling;

-int WebRtcAecm_ProcessBlock(AecmCore* aecm,

- const int16_t* farend,

- const int16_t* nearendNoisy,

- const int16_t* nearendClean,

- int16_t* output) {

- int i;

- uint32_t xfaSum;

- uint32_t dfaNoisySum;

- uint32_t dfaCleanSum;

- uint32_t echoEst32Gained;

- uint32_t tmpU32;

- int32_t tmp32no1;

- uint16_t xfa[PART_LEN1];

- uint16_t dfaNoisy[PART_LEN1];

- uint16_t dfaClean[PART_LEN1];

- uint16_t* ptrDfaClean = dfaClean;

- const uint16_t* far_spectrum_ptr = NULL;

- // 32 byte aligned buffers (with +8 or +16).

- int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.

- int32_t echoEst32_buf[PART_LEN1 + 8];

- int32_t dfw_buf[PART_LEN2 + 8];

- int32_t efw_buf[PART_LEN2 + 8];

- int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31);

- int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31);

- ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31);

- ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31);

- int16_t hnl[PART_LEN1];

- int16_t numPosCoef = 0;

- int delay;

- int16_t tmp16no1;

- int16_t tmp16no2;

- int16_t mu;

- int16_t supGain;

- int16_t zeros32, zeros16;

- int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;

- int far_q;

- int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;

- const int kMinPrefBand = 4;

- const int kMaxPrefBand = 24;

- int32_t avgHnl32 = 0;

- int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;

- int16_t* ptr;

- int16_t* ptr1;

- int16_t* er_ptr;

- int16_t* dr_ptr;

- ptr = &hnl[0];

- ptr1 = &hnl[0];

- er_ptr = &efw[0].real;

- dr_ptr = &dfw[0].real;

- // Determine startup state. There are three states:

- // (0) the first CONV_LEN blocks

- // (1) another CONV_LEN blocks

- // (2) the rest

- if (aecm->startupState < 2) {

- aecm->startupState = (aecm->totCount >= CONV_LEN) +

- (aecm->totCount >= CONV_LEN2);

- }

- // END: Determine startup state

- // Buffer near and far end signals

- memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);

- memcpy(aecm->dBufNoisy + PART_LEN,

- nearendNoisy,

- sizeof(int16_t) * PART_LEN);

- if (nearendClean != NULL) {

- memcpy(aecm->dBufClean + PART_LEN,

- nearendClean,

- sizeof(int16_t) * PART_LEN);

- }

- // Transform far end signal from time domain to frequency domain.

- far_q = TimeToFrequencyDomain(aecm,

- aecm->xBuf,

- dfw,

- xfa,

- &xfaSum);

- // Transform noisy near end signal from time domain to frequency domain.

- zerosDBufNoisy = TimeToFrequencyDomain(aecm,

- aecm->dBufNoisy,

- dfw,

- dfaNoisy,

- &dfaNoisySum);

- aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;

- aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;

- if (nearendClean == NULL) {

- ptrDfaClean = dfaNoisy;

- aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;

- aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;

- dfaCleanSum = dfaNoisySum;

- } else {

- // Transform clean near end signal from time domain to frequency domain.

- zerosDBufClean = TimeToFrequencyDomain(aecm,

- aecm->dBufClean,

- dfw,

- dfaClean,

- &dfaCleanSum);

- aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;

- aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;

- }

- // Get the delay

- // Save far-end history and estimate delay

- WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);

- if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,

- far_q) == -1) {

- return -1;

- }

- delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,

- dfaNoisy,

- PART_LEN1,

- zerosDBufNoisy);

- if (delay == -1) {

- return -1;

- }

- else if (delay == -2) {

- // If the delay is unknown, we assume zero.

- // NOTE: this will have to be adjusted if we ever add lookahead.

- delay = 0;

- }

- if (aecm->fixedDelay >= 0) {

- // Use fixed delay

- delay = aecm->fixedDelay;

- }

- // Get aligned far end spectrum

- far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);

- zerosXBuf = (int16_t) far_q;

- if (far_spectrum_ptr == NULL) {

- return -1;

- }

- // Calculate log(energy) and update energy threshold levels

- WebRtcAecm_CalcEnergies(aecm,

- far_spectrum_ptr,

- zerosXBuf,

- dfaNoisySum,

- echoEst32);

- // Calculate stepsize

- mu = WebRtcAecm_CalcStepSize(aecm);

- // Update counters

- aecm->totCount++;

- // This is the channel estimation algorithm.

- // It is base on NLMS but has a variable step length,

- // which was calculated above.

- WebRtcAecm_UpdateChannel(aecm,

- far_spectrum_ptr,

- zerosXBuf,

- dfaNoisy,

- mu,

- echoEst32);

- supGain = WebRtcAecm_CalcSuppressionGain(aecm);

- // Calculate Wiener filter hnl[]

- for (i = 0; i < PART_LEN1; i++) {

- // Far end signal through channel estimate in Q8

- // How much can we shift right to preserve resolution

- tmp32no1 = echoEst32[i] - aecm->echoFilt[i];

- aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;

- zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;

- zeros16 = WebRtcSpl_NormW16(supGain) + 1;

- if (zeros32 + zeros16 > 16) {

- // Multiplication is safe

- // Result in

- // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])

- echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],

- (uint16_t)supGain);

- resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;

- resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);

- } else {

- tmp16no1 = 17 - zeros32 - zeros16;

- resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -

- RESOLUTION_SUPGAIN;

- resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);

- if (zeros32 > tmp16no1) {

- echoEst32Gained = WEBRTC_SPL_UMUL_32_16(

- (uint32_t)aecm->echoFilt[i],

- supGain >> tmp16no1);

- } else {

- // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)

- echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;

- }

- zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);

- assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative.

- dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;

- if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {

- tmp16no1 = aecm->nearFilt[i] << zeros16;

- qDomainDiff = zeros16 - dfa_clean_q_domain_diff;

- tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;

- } else {

- tmp16no1 = dfa_clean_q_domain_diff < 0

- ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff

- : aecm->nearFilt[i] << dfa_clean_q_domain_diff;

- qDomainDiff = 0;

- tmp16no2 = ptrDfaClean[i];

- }

- tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);

- tmp16no2 = (int16_t)(tmp32no1 >> 4);

- tmp16no2 += tmp16no1;

- zeros16 = WebRtcSpl_NormW16(tmp16no2);

- if ((tmp16no2) & (-qDomainDiff > zeros16)) {

- aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;

- } else {

- aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff

- : tmp16no2 >> qDomainDiff;

- }

- // Wiener filter coefficients, resulting hnl in Q14

- if (echoEst32Gained == 0) {

- hnl[i] = ONE_Q14;

- numPosCoef++;

- } else if (aecm->nearFilt[i] == 0) {

- hnl[i] = 0;

- } else {

- // Multiply the suppression gain

- // Rounding

- echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);

- tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,

- (uint16_t)aecm->nearFilt[i]);

- // Current resolution is

- // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN

- // - max(0, 17 - zeros16 - zeros32))

- // Make sure we are in Q14

- tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);

- if (tmp32no1 > ONE_Q14) {

- hnl[i] = 0;

- } else if (tmp32no1 < 0) {

- hnl[i] = ONE_Q14;

- numPosCoef++;

- } else {

- // 1-echoEst/dfa

- hnl[i] = ONE_Q14 - (int16_t)tmp32no1;

- if (hnl[i] <= 0) {

- hnl[i] = 0;

- } else {

- numPosCoef++;

- }

- // Only in wideband. Prevent the gain in upper band from being larger than

- // in lower band.

- if (aecm->mult == 2) {

- // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause

- // speech distortion in double-talk.

- for (i = 0; i < (PART_LEN1 >> 3); i++) {

- __asm __volatile (

- "lh %[temp1], 0(%[ptr1]) \n\t"

- "lh %[temp2], 2(%[ptr1]) \n\t"

- "lh %[temp3], 4(%[ptr1]) \n\t"

- "lh %[temp4], 6(%[ptr1]) \n\t"

- "lh %[temp5], 8(%[ptr1]) \n\t"

- "lh %[temp6], 10(%[ptr1]) \n\t"

- "lh %[temp7], 12(%[ptr1]) \n\t"

- "lh %[temp8], 14(%[ptr1]) \n\t"

- "mul %[temp1], %[temp1], %[temp1] \n\t"

- "mul %[temp2], %[temp2], %[temp2] \n\t"

- "mul %[temp3], %[temp3], %[temp3] \n\t"

- "mul %[temp4], %[temp4], %[temp4] \n\t"

- "mul %[temp5], %[temp5], %[temp5] \n\t"

- "mul %[temp6], %[temp6], %[temp6] \n\t"

- "mul %[temp7], %[temp7], %[temp7] \n\t"

- "mul %[temp8], %[temp8], %[temp8] \n\t"

- "sra %[temp1], %[temp1], 14 \n\t"

- "sra %[temp2], %[temp2], 14 \n\t"

- "sra %[temp3], %[temp3], 14 \n\t"

- "sra %[temp4], %[temp4], 14 \n\t"

- "sra %[temp5], %[temp5], 14 \n\t"

- "sra %[temp6], %[temp6], 14 \n\t"

- "sra %[temp7], %[temp7], 14 \n\t"

- "sra %[temp8], %[temp8], 14 \n\t"

- "sh %[temp1], 0(%[ptr1]) \n\t"

- "sh %[temp2], 2(%[ptr1]) \n\t"

- "sh %[temp3], 4(%[ptr1]) \n\t"

- "sh %[temp4], 6(%[ptr1]) \n\t"

- "sh %[temp5], 8(%[ptr1]) \n\t"

- "sh %[temp6], 10(%[ptr1]) \n\t"

- "sh %[temp7], 12(%[ptr1]) \n\t"

- "sh %[temp8], 14(%[ptr1]) \n\t"

- "addiu %[ptr1], %[ptr1], 16 \n\t"

- : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

- [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),

- [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1)

- :

- : "memory", "hi", "lo"

- );

- }

- for(i = 0; i < (PART_LEN1 & 7); i++) {

- __asm __volatile (

- "lh %[temp1], 0(%[ptr1]) \n\t"

- "mul %[temp1], %[temp1], %[temp1] \n\t"

- "sra %[temp1], %[temp1], 14 \n\t"

- "sh %[temp1], 0(%[ptr1]) \n\t"

- "addiu %[ptr1], %[ptr1], 2 \n\t"

- : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1)

- :

- : "memory", "hi", "lo"

- );

- }

- for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {

- avgHnl32 += (int32_t)hnl[i];

- }

- assert(kMaxPrefBand - kMinPrefBand + 1 > 0);

- avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);

- for (i = kMaxPrefBand; i < PART_LEN1; i++) {

- if (hnl[i] > (int16_t)avgHnl32) {

- hnl[i] = (int16_t)avgHnl32;

- }

- // Calculate NLP gain, result is in Q14

- if (aecm->nlpFlag) {

- if (numPosCoef < 3) {

- for (i = 0; i < PART_LEN1; i++) {

- efw[i].real = 0;

- efw[i].imag = 0;

- hnl[i] = 0;

- }

- } else {

- for (i = 0; i < PART_LEN1; i++) {

-#if defined(MIPS_DSP_R1_LE)

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "lh %[temp1], 0(%[ptr]) \n\t"

- "lh %[temp2], 0(%[dr_ptr]) \n\t"

- "slti %[temp4], %[temp1], 0x4001 \n\t"

- "beqz %[temp4], 3f \n\t"

- " lh %[temp3], 2(%[dr_ptr]) \n\t"

- "slti %[temp5], %[temp1], 3277 \n\t"

- "bnez %[temp5], 2f \n\t"

- " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"

- "mul %[temp2], %[temp2], %[temp1] \n\t"

- "mul %[temp3], %[temp3], %[temp1] \n\t"

- "shra_r.w %[temp2], %[temp2], 14 \n\t"

- "shra_r.w %[temp3], %[temp3], 14 \n\t"

- "b 4f \n\t"

- " nop \n\t"

- "2: \n\t"

- "addu %[temp1], $zero, $zero \n\t"

- "addu %[temp2], $zero, $zero \n\t"

- "addu %[temp3], $zero, $zero \n\t"

- "b 1f \n\t"

- " nop \n\t"

- "3: \n\t"

- "addiu %[temp1], $0, 0x4000 \n\t"

- "1: \n\t"

- "sh %[temp1], 0(%[ptr]) \n\t"

- "4: \n\t"

- "sh %[temp2], 0(%[er_ptr]) \n\t"

- "sh %[temp3], 2(%[er_ptr]) \n\t"

- "addiu %[ptr], %[ptr], 2 \n\t"

- "addiu %[er_ptr], %[er_ptr], 4 \n\t"

- ".set pop \n\t"

- : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

- [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),

- [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)

- :

- : "memory", "hi", "lo"

- );

-#else

- __asm __volatile (

- ".set push \n\t"

- ".set noreorder \n\t"

- "lh %[temp1], 0(%[ptr]) \n\t"

- "lh %[temp2], 0(%[dr_ptr]) \n\t"

- "slti %[temp4], %[temp1], 0x4001 \n\t"

- "beqz %[temp4], 3f \n\t"

- " lh %[temp3], 2(%[dr_ptr]) \n\t"

- "slti %[temp5], %[temp1], 3277 \n\t"

- "bnez %[temp5], 2f \n\t"

- " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"

- "mul %[temp2], %[temp2], %[temp1] \n\t"

- "mul %[temp3], %[temp3], %[temp1] \n\t"

- "addiu %[temp2], %[temp2], 0x2000 \n\t"

- "addiu %[temp3], %[temp3], 0x2000 \n\t"

- "sra %[temp2], %[temp2], 14 \n\t"

- "sra %[temp3], %[temp3], 14 \n\t"

- "b 4f \n\t"

- " nop \n\t"

- "2: \n\t"

- "addu %[temp1], $zero, $zero \n\t"

- "addu %[temp2], $zero, $zero \n\t"

- "addu %[temp3], $zero, $zero \n\t"

- "b 1f \n\t"

- " nop \n\t"

- "3: \n\t"

- "addiu %[temp1], $0, 0x4000 \n\t"

- "1: \n\t"

- "sh %[temp1], 0(%[ptr]) \n\t"

- "4: \n\t"

- "sh %[temp2], 0(%[er_ptr]) \n\t"

- "sh %[temp3], 2(%[er_ptr]) \n\t"

- "addiu %[ptr], %[ptr], 2 \n\t"

- "addiu %[er_ptr], %[er_ptr], 4 \n\t"

- ".set pop \n\t"

- : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),

- [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),

- [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)

- :

- : "memory", "hi", "lo"

- );

-#endif

- }

- else {

- // multiply with Wiener coefficients

- for (i = 0; i < PART_LEN1; i++) {

- efw[i].real = (int16_t)

- (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,

- hnl[i],

- 14));

- efw[i].imag = (int16_t)

- (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,

- hnl[i],

- 14));

- }

- if (aecm->cngMode == AecmTrue) {

- ComfortNoise(aecm, ptrDfaClean, efw, hnl);

- }

- InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);

- return 0;

-// Generate comfort noise and add to output signal.

-static void ComfortNoise(AecmCore* aecm,

- const uint16_t* dfa,

- ComplexInt16* out,

- const int16_t* lambda) {

- int16_t i;

- int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;

- int32_t tmp32, tmp321, tnoise, tnoise1;

- int32_t tmp322, tmp323, *tmp1;

- int16_t* dfap;

- int16_t* lambdap;

- const int32_t c2049 = 2049;

- const int32_t c359 = 359;

- const int32_t c114 = ONE_Q14;

- int16_t randW16[PART_LEN];

- int16_t uReal[PART_LEN1];

- int16_t uImag[PART_LEN1];

- int32_t outLShift32;

- int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;

- int16_t minTrackShift = 9;

- assert(shiftFromNearToNoise >= 0);

- assert(shiftFromNearToNoise < 16);

- if (aecm->noiseEstCtr < 100) {

- // Track the minimum more quickly initially.

- aecm->noiseEstCtr++;

- minTrackShift = 6;

- }

- // Generate a uniform random array on [0 2^15-1].

- WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);

- int16_t* randW16p = (int16_t*)randW16;

-#if defined (MIPS_DSP_R1_LE)

- int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;

- int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;

-#endif // #if defined(MIPS_DSP_R1_LE)

- tmp1 = (int32_t*)aecm->noiseEst + 1;

- dfap = (int16_t*)dfa + 1;

- lambdap = (int16_t*)lambda + 1;

- // Estimate noise power.

- for (i = 1; i < PART_LEN1; i+=2) {

- // Shift to the noise domain.

- __asm __volatile (

- "lh %[tmp32], 0(%[dfap]) \n\t"

- "lw %[tnoise], 0(%[tmp1]) \n\t"

- "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"

- : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32),

- [tnoise] "=&r" (tnoise)

- : [tmp1] "r" (tmp1), [dfap] "r" (dfap),

- [shiftFromNearToNoise] "r" (shiftFromNearToNoise)

- : "memory"

- );

- if (outLShift32 < tnoise) {

- // Reset "too low" counter

- aecm->noiseEstTooLowCtr[i] = 0;

- // Track the minimum.

- if (tnoise < (1 << minTrackShift)) {

- // For small values, decrease noiseEst[i] every

- // |kNoiseEstIncCount| block. The regular approach below can not

- // go further down due to truncation.

- aecm->noiseEstTooHighCtr[i]++;

- if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {

- tnoise--;

- aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter

- }

- } else {

- __asm __volatile (

- "subu %[tmp32], %[tnoise], %[outLShift32] \n\t"

- "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"

- "subu %[tnoise], %[tnoise], %[tmp32] \n\t"

- : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise)

- : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)

- );

- }

- } else {

- // Reset "too high" counter

- aecm->noiseEstTooHighCtr[i] = 0;

- // Ramp slowly upwards until we hit the minimum again.

- if ((tnoise >> 19) <= 0) {

- if ((tnoise >> 11) > 0) {

- // Large enough for relative increase

- __asm __volatile (

- "mul %[tnoise], %[tnoise], %[c2049] \n\t"

- "sra %[tnoise], %[tnoise], 11 \n\t"

- : [tnoise] "+r" (tnoise)

- : [c2049] "r" (c2049)

- : "hi", "lo"

- );

- } else {

- // Make incremental increases based on size every

- // |kNoiseEstIncCount| block

- aecm->noiseEstTooLowCtr[i]++;

- if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {

- __asm __volatile (

- "sra %[tmp32], %[tnoise], 9 \n\t"

- "addi %[tnoise], %[tnoise], 1 \n\t"

- "addu %[tnoise], %[tnoise], %[tmp32] \n\t"

- : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32)

- :

- );

- aecm->noiseEstTooLowCtr[i] = 0; // Reset counter

- }

- } else {

- // Avoid overflow.

- // Multiplication with 2049 will cause wrap around. Scale

- // down first and then multiply

- __asm __volatile (

- "sra %[tnoise], %[tnoise], 11 \n\t"

- "mul %[tnoise], %[tnoise], %[c2049] \n\t"

- : [tnoise] "+r" (tnoise)

- : [c2049] "r" (c2049)

- : "hi", "lo"

- );

- }

- // Shift to the noise domain.

- __asm __volatile (

- "lh %[tmp32], 2(%[dfap]) \n\t"

- "lw %[tnoise1], 4(%[tmp1]) \n\t"

- "addiu %[dfap], %[dfap], 4 \n\t"

- "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"

- : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap),

- [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1)

- : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise)

- : "memory"

- );

- if (outLShift32 < tnoise1) {

- // Reset "too low" counter

- aecm->noiseEstTooLowCtr[i + 1] = 0;

- // Track the minimum.

- if (tnoise1 < (1 << minTrackShift)) {

- // For small values, decrease noiseEst[i] every

- // |kNoiseEstIncCount| block. The regular approach below can not

- // go further down due to truncation.

- aecm->noiseEstTooHighCtr[i + 1]++;

- if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {

- tnoise1--;

- aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter

- }

- } else {

- __asm __volatile (

- "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t"

- "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"

- "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t"

- : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1)

- : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)

- );

- }

- } else {

- // Reset "too high" counter

- aecm->noiseEstTooHighCtr[i + 1] = 0;

- // Ramp slowly upwards until we hit the minimum again.

- if ((tnoise1 >> 19) <= 0) {

- if ((tnoise1 >> 11) > 0) {

- // Large enough for relative increase

- __asm __volatile (

- "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"

- "sra %[tnoise1], %[tnoise1], 11 \n\t"

- : [tnoise1] "+r" (tnoise1)

- : [c2049] "r" (c2049)

- : "hi", "lo"

- );

- } else {

- // Make incremental increases based on size every

- // |kNoiseEstIncCount| block

- aecm->noiseEstTooLowCtr[i + 1]++;

- if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {

- __asm __volatile (

- "sra %[tmp32], %[tnoise1], 9 \n\t"

- "addi %[tnoise1], %[tnoise1], 1 \n\t"

- "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t"

- : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32)

- :

- );

- aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter

- }

- } else {

- // Avoid overflow.

- // Multiplication with 2049 will cause wrap around. Scale

- // down first and then multiply

- __asm __volatile (

- "sra %[tnoise1], %[tnoise1], 11 \n\t"

- "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"

- : [tnoise1] "+r" (tnoise1)

- : [c2049] "r" (c2049)

- : "hi", "lo"

- );

- }

- __asm __volatile (

- "lh %[tmp16], 0(%[lambdap]) \n\t"

- "lh %[tmp161], 2(%[lambdap]) \n\t"

- "sw %[tnoise], 0(%[tmp1]) \n\t"

- "sw %[tnoise1], 4(%[tmp1]) \n\t"

- "subu %[tmp16], %[c114], %[tmp16] \n\t"

- "subu %[tmp161], %[c114], %[tmp161] \n\t"

- "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t"

- "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t"

- "addiu %[lambdap], %[lambdap], 4 \n\t"

- "addiu %[tmp1], %[tmp1], 8 \n\t"

- : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1),

- [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap)

- : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114),

- [shiftFromNearToNoise] "r" (shiftFromNearToNoise)

- : "memory"

- );

- if (tmp32 > 32767) {

- tmp32 = 32767;

- aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;

- }

- if (tmp321 > 32767) {

- tmp321 = 32767;

- aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise;

- }

- __asm __volatile (

- "mul %[tmp32], %[tmp32], %[tmp16] \n\t"

- "mul %[tmp321], %[tmp321], %[tmp161] \n\t"

- "sra %[nrsh1], %[tmp32], 14 \n\t"

- "sra %[nrsh2], %[tmp321], 14 \n\t"

- : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2)

- : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32),

- [tmp321] "r" (tmp321)

- : "memory", "hi", "lo"

- );

- __asm __volatile (

- "lh %[tmp32], 0(%[randW16p]) \n\t"

- "lh %[tmp321], 2(%[randW16p]) \n\t"

- "addiu %[randW16p], %[randW16p], 4 \n\t"

- "mul %[tmp32], %[tmp32], %[c359] \n\t"

- "mul %[tmp321], %[tmp321], %[c359] \n\t"

- "sra %[tmp16], %[tmp32], 15 \n\t"

- "sra %[tmp161], %[tmp321], 15 \n\t"

- : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32),

- [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321)

- : [c359] "r" (c359)

- : "memory", "hi", "lo"

- );

-#if !defined(MIPS_DSP_R1_LE)

- tmp32 = WebRtcAecm_kCosTable[tmp16];

- tmp321 = WebRtcAecm_kSinTable[tmp16];

- tmp322 = WebRtcAecm_kCosTable[tmp161];

- tmp323 = WebRtcAecm_kSinTable[tmp161];

-#else

- __asm __volatile (

- "sll %[tmp16], %[tmp16], 1 \n\t"

- "sll %[tmp161], %[tmp161], 1 \n\t"

- "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t"

- "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t"

- "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t"

- "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t"

- : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321),

- [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323)

- : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16),

- [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep)

- : "memory"

- );

-#endif

- __asm __volatile (

- "mul %[tmp32], %[tmp32], %[nrsh1] \n\t"

- "negu %[tmp162], %[nrsh1] \n\t"

- "mul %[tmp322], %[tmp322], %[nrsh2] \n\t"

- "negu %[tmp163], %[nrsh2] \n\t"

- "sra %[tmp32], %[tmp32], 13 \n\t"

- "mul %[tmp321], %[tmp321], %[tmp162] \n\t"

- "sra %[tmp322], %[tmp322], 13 \n\t"

- "mul %[tmp323], %[tmp323], %[tmp163] \n\t"

- "sra %[tmp321], %[tmp321], 13 \n\t"

- "sra %[tmp323], %[tmp323], 13 \n\t"

- : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162),

- [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163)

- : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2)

- : "hi", "lo"

- );

- // Tables are in Q13.

- uReal[i] = (int16_t)tmp32;

- uImag[i] = (int16_t)tmp321;

- uReal[i + 1] = (int16_t)tmp322;

- uImag[i + 1] = (int16_t)tmp323;

- }

- int32_t tt, sgn;

- tt = out[0].real;

- sgn = ((int)tt) >> 31;

- out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

- tt = out[0].imag;

- sgn = ((int)tt) >> 31;

- out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

- for (i = 1; i < PART_LEN; i++) {

- tt = out[i].real + uReal[i];

- sgn = ((int)tt) >> 31;

- out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

- tt = out[i].imag + uImag[i];

- sgn = ((int)tt) >> 31;

- out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

- }

- tt = out[PART_LEN].real + uReal[PART_LEN];

- sgn = ((int)tt) >> 31;

- out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

- tt = out[PART_LEN].imag;

- sgn = ((int)tt) >> 31;

- out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);

« no previous file with comments | « webrtc/modules/audio_processing/aecm/aecm_core_c.cc ('k') | webrtc/modules/audio_processing/aecm/aecm_core_mips.cc » ('j') | no next file with comments »