webrtc/modules/audio_processing/aec/aec_core.c - Issue 1492343002: Echo Suppressor code simplification using the InverseFft function (#5)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core.c

Issue 1492343002: Echo Suppressor code simplification using the InverseFft function (#5) (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ESUP_refactoring4_CL

Patch Set: Changed the scaling of the ScaledInverseFft Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 26 matching lines...) Expand all Loading...
37	37

38 // Buffer size (samples)	38 // Buffer size (samples)

39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz.	39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz.

40	40

41 // Metrics	41 // Metrics

42 static const int subCountLen = 4;	42 static const int subCountLen = 4;

43 static const int countLen = 50;	43 static const int countLen = 50;

44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.	44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.

45	45

46 // Quantities to control H band scaling for SWB input	46 // Quantities to control H band scaling for SWB input

47 static const int flagHbandCn = 1; // flag for adding comfort noise in H band

48 static const float cnScaleHband =	47 static const float cnScaleHband =

49 (float)0.4; // scale for comfort noise in H band	48 (float)0.4; // scale for comfort noise in H band

50 // Initial bin for averaging nlp gain in low band	49 // Initial bin for averaging nlp gain in low band

51 static const int freqAvgIc = PART_LEN / 2;	50 static const int freqAvgIc = PART_LEN / 2;

52	51

53 // Matlab code to produce table:	52 // Matlab code to produce table:

54 // win = sqrt(hanning(63)); win = [0 ; win(1:32)];	53 // win = sqrt(hanning(63)); win = [0 ; win(1:32)];

55 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);	54 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);

56 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = {	55 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = {

57 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,	56 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,

(...skipping 418 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
476 // tmp = 1 - lambda[i];	475 // tmp = 1 - lambda[i];

477 efw[0][i] += tmp * u[0][i];	476 efw[0][i] += tmp * u[0][i];

478 efw[1][i] += tmp * u[1][i];	477 efw[1][i] += tmp * u[1][i];

479 }	478 }

480	479

481 // For H band comfort noise	480 // For H band comfort noise

482 // TODO: don't compute noise and "tmp" twice. Use the previous results.	481 // TODO: don't compute noise and "tmp" twice. Use the previous results.

483 noiseAvg = 0.0;	482 noiseAvg = 0.0;

484 tmpAvg = 0.0;	483 tmpAvg = 0.0;

485 num = 0;	484 num = 0;

486 if (aec->num_bands > 1 && flagHbandCn == 1) {	485 if (aec->num_bands > 1) {

487	486

488 // average noise scale	487 // average noise scale

489 // average over second half of freq spectrum (i.e., 4->8khz)	488 // average over second half of freq spectrum (i.e., 4->8khz)

490 // TODO: we shouldn't need num. We know how many elements we're summing.	489 // TODO: we shouldn't need num. We know how many elements we're summing.

491 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {	490 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {

492 num++;	491 num++;

493 noiseAvg += sqrtf(noisePow[i]);	492 noiseAvg += sqrtf(noisePow[i]);

494 }	493 }

495 noiseAvg /= (float)num;	494 noiseAvg /= (float)num;

496	495

(...skipping 310 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
807 self->num_delay_values;	806 self->num_delay_values;

808 }	807 }

809	808

810 // Reset histogram.	809 // Reset histogram.

811 memset(self->delay_histogram, 0, sizeof(self->delay_histogram));	810 memset(self->delay_histogram, 0, sizeof(self->delay_histogram));

812 self->num_delay_values = 0;	811 self->num_delay_values = 0;

813	812

814 return;	813 return;

815 }	814 }

816	815

817 static void InverseFft(float freq_data[2][PART_LEN1],	816 static void ScaledInverseFft(float freq_data[2][PART_LEN1],

818 float time_data[PART_LEN2]) {	817 float time_data[PART_LEN2],

	818 float scale,

	819 int conjugate) {

819 int i;	820 int i;

820 const float scale = 1.0f / PART_LEN2;	821 const float normalization = scale / ((float)PART_LEN2);

821 time_data[0] = freq_data[0][0] * scale;	822 const float sign = (conjugate ? -1 : 1);

822 time_data[1] = freq_data[0][PART_LEN] * scale;	823 time_data[0] = freq_data[0][0] * normalization;

	824 time_data[1] = freq_data[0][PART_LEN] * normalization;

823 for (i = 1; i < PART_LEN; i++) {	825 for (i = 1; i < PART_LEN; i++) {

824 time_data[2 * i] = freq_data[0][i] * scale;	826 time_data[2 * i] = freq_data[0][i] * normalization;

825 time_data[2 * i + 1] = freq_data[1][i] * scale;	827 time_data[2 * i + 1] = sign * freq_data[1][i] * normalization;

826 }	828 }

827 aec_rdft_inverse_128(time_data);	829 aec_rdft_inverse_128(time_data);

828 }	830 }

829	831

830	832

831 static void Fft(float time_data[PART_LEN2],	833 static void Fft(float time_data[PART_LEN2],

832 float freq_data[2][PART_LEN1]) {	834 float freq_data[2][PART_LEN1]) {

833 int i;	835 int i;

834 aec_rdft_forward_128(time_data);	836 aec_rdft_forward_128(time_data);

835	837

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
956 }	958 }

957	959

958 // Produce echo estimate s_fft.	960 // Produce echo estimate s_fft.

959 WebRtcAec_FilterFar(num_partitions,	961 WebRtcAec_FilterFar(num_partitions,

960 x_fft_buf_block_pos,	962 x_fft_buf_block_pos,

961 x_fft_buf,	963 x_fft_buf,

962 h_fft_buf,	964 h_fft_buf,

963 s_fft);	965 s_fft);

964	966

965 // Compute the time-domain echo estimate s.	967 // Compute the time-domain echo estimate s.

966 InverseFft(s_fft, s_extended);	968 ScaledInverseFft(s_fft, s_extended, 2.0f, 0);

967 s = &s_extended[PART_LEN];	969 s = &s_extended[PART_LEN];

968 for (i = 0; i < PART_LEN; ++i) {

969 s[i] *= 2.0f;

970 }

971	970

972 // Compute the time-domain echo prediction error.	971 // Compute the time-domain echo prediction error.

973 for (i = 0; i < PART_LEN; ++i) {	972 for (i = 0; i < PART_LEN; ++i) {

974 e[i] = y[i] - s[i];	973 e[i] = y[i] - s[i];

975 }	974 }

976	975

977 // Compute the frequency domain echo prediction error.	976 // Compute the frequency domain echo prediction error.

978 memset(e_extended, 0, sizeof(float) * PART_LEN);	977 memset(e_extended, 0, sizeof(float) * PART_LEN);

979 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);	978 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);

980 Fft(e_extended, e_fft);	979 Fft(e_extended, e_fft);

(...skipping 26 matching lines...) Expand all Loading...
1007	1006

1008 static void EchoSuppression(AecCore* aec,	1007 static void EchoSuppression(AecCore* aec,

1009 float* echo_subtractor_output,	1008 float* echo_subtractor_output,

1010 float* output,	1009 float* output,

1011 float* const* outputH) {	1010 float* const* outputH) {

1012 float efw[2][PART_LEN1];	1011 float efw[2][PART_LEN1];

1013 float xfw[2][PART_LEN1];	1012 float xfw[2][PART_LEN1];

1014 float dfw[2][PART_LEN1];	1013 float dfw[2][PART_LEN1];

1015 float comfortNoiseHband[2][PART_LEN1];	1014 float comfortNoiseHband[2][PART_LEN1];

1016 float fft[PART_LEN2];	1015 float fft[PART_LEN2];

1017 float scale, dtmp;

1018 float nlpGainHband;	1016 float nlpGainHband;

1019 int i;	1017 int i;

1020 size_t j;	1018 size_t j;

1021	1019

1022 // Coherence and non-linear filter	1020 // Coherence and non-linear filter

1023 float cohde[PART_LEN1], cohxd[PART_LEN1];	1021 float cohde[PART_LEN1], cohxd[PART_LEN1];

1024 float hNlDeAvg, hNlXdAvg;	1022 float hNlDeAvg, hNlXdAvg;

1025 float hNl[PART_LEN1];	1023 float hNl[PART_LEN1];

1026 float hNlPref[kPrefBandSize];	1024 float hNlPref[kPrefBandSize];

1027 float hNlFb = 0, hNlFbLow = 0;	1025 float hNlFb = 0, hNlFbLow = 0;

(...skipping 19 matching lines...) Expand all Loading...
1047 // Windowed near-end ffts.	1045 // Windowed near-end ffts.

1048 WindowData(fft, aec->dBuf);	1046 WindowData(fft, aec->dBuf);

1049 aec_rdft_forward_128(fft);	1047 aec_rdft_forward_128(fft);

1050 StoreAsComplex(fft, dfw);	1048 StoreAsComplex(fft, dfw);

1051	1049

1052 // Windowed echo suppressor output ffts.	1050 // Windowed echo suppressor output ffts.

1053 WindowData(fft, aec->eBuf);	1051 WindowData(fft, aec->eBuf);

1054 aec_rdft_forward_128(fft);	1052 aec_rdft_forward_128(fft);

1055 StoreAsComplex(fft, efw);	1053 StoreAsComplex(fft, efw);

1056	1054

1057 aec->delayEstCtr++;

1058 if (aec->delayEstCtr == delayEstInterval) {

1059 aec->delayEstCtr = 0;

1060 }

1061

1062 // We should always have at least one element stored in \|far_buf\|.	1055 // We should always have at least one element stored in \|far_buf\|.

1063 assert(WebRtc_available_read(aec->far_buf_windowed) > 0);	1056 assert(WebRtc_available_read(aec->far_buf_windowed) > 0);

1064 // NLP	1057 // NLP

1065 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);	1058 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);

1066	1059

1067 // TODO(bjornv): Investigate if we can reuse \|far_buf_windowed\| instead of	1060 // TODO(bjornv): Investigate if we can reuse \|far_buf_windowed\| instead of

1068 // \|xfwBuf\|.	1061 // \|xfwBuf\|.

1069 // Buffer far.	1062 // Buffer far.

1070 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);	1063 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);

1071	1064

1072 if (aec->delayEstCtr == 0)	1065 aec->delayEstCtr++;

	1066 if (aec->delayEstCtr == delayEstInterval) {

	1067 aec->delayEstCtr = 0;

1073 aec->delayIdx = WebRtcAec_PartitionDelay(aec);	1068 aec->delayIdx = WebRtcAec_PartitionDelay(aec);

	1069 }

1074	1070

1075 // Use delayed far.	1071 // Use delayed far.

1076 memcpy(xfw,	1072 memcpy(xfw,

1077 aec->xfwBuf + aec->delayIdx * PART_LEN1,	1073 aec->xfwBuf + aec->delayIdx * PART_LEN1,

1078 sizeof(xfw[0][0]) * 2 * PART_LEN1);	1074 sizeof(xfw[0][0]) * 2 * PART_LEN1);

1079	1075

1080 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,	1076 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,

1081 &aec->extreme_filter_divergence);	1077 &aec->extreme_filter_divergence);

1082	1078

1083 // Select the microphone signal as output if the filter is deemed to have	1079 // Select the microphone signal as output if the filter is deemed to have

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1183	1179

1184 // TODO(bjornv): Investigate how to take the windowing below into account if	1180 // TODO(bjornv): Investigate how to take the windowing below into account if

1185 // needed.	1181 // needed.

1186 if (aec->metricsMode == 1) {	1182 if (aec->metricsMode == 1) {

1187 // Note that we have a scaling by two in the time domain \|eBuf\|.	1183 // Note that we have a scaling by two in the time domain \|eBuf\|.

1188 // In addition the time domain signal is windowed before transformation,	1184 // In addition the time domain signal is windowed before transformation,

1189 // losing half the energy on the average. We take care of the first	1185 // losing half the energy on the average. We take care of the first

1190 // scaling only in UpdateMetrics().	1186 // scaling only in UpdateMetrics().

1191 UpdateLevel(&aec->nlpoutlevel, efw);	1187 UpdateLevel(&aec->nlpoutlevel, efw);

1192 }	1188 }

	1189

1193 // Inverse error fft.	1190 // Inverse error fft.

1194 fft[0] = efw[0][0];	1191 ScaledInverseFft(efw, fft, 2.0f, 1);

1195 fft[1] = efw[0][PART_LEN];

1196 for (i = 1; i < PART_LEN; i++) {

1197 fft[2 * i] = efw[0][i];

1198 // Sign change required by Ooura fft.

1199 fft[2 * i + 1] = -efw[1][i];

1200 }

1201 aec_rdft_inverse_128(fft);

1202	1192

1203 // Overlap and add to obtain output.	1193 // Overlap and add to obtain output.

1204 scale = 2.0f / PART_LEN2;

1205 for (i = 0; i < PART_LEN; i++) {	1194 for (i = 0; i < PART_LEN; i++) {

1206 fft[i] *= scale; // fft scaling	1195 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +

1207 fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i];	1196 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);

1208

1209 fft[PART_LEN + i] *= scale; // fft scaling

1210 aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];

1211	1197

1212 // Saturate output to keep it in the allowed range.	1198 // Saturate output to keep it in the allowed range.

1213 output[i] = WEBRTC_SPL_SAT(	1199 output[i] = WEBRTC_SPL_SAT(

1214 WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);	1200 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);

1215 }	1201 }

	1202 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));

1216	1203

1217 // For H band	1204 // For H band

1218 if (aec->num_bands > 1) {	1205 if (aec->num_bands > 1) {

1219

1220 // H band gain	1206 // H band gain

1221 // average nlp over low band: average over second half of freq spectrum	1207 // average nlp over low band: average over second half of freq spectrum

1222 // (4->8khz)	1208 // (4->8khz)

1223 GetHighbandGain(hNl, &nlpGainHband);	1209 GetHighbandGain(hNl, &nlpGainHband);

1224	1210

1225 // Inverse comfort_noise	1211 // Inverse comfort_noise

1226 if (flagHbandCn == 1) {	1212 ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0);

1227 fft[0] = comfortNoiseHband[0][0];

1228 fft[1] = comfortNoiseHband[0][PART_LEN];

1229 for (i = 1; i < PART_LEN; i++) {

1230 fft[2 * i] = comfortNoiseHband[0][i];

1231 fft[2 * i + 1] = comfortNoiseHband[1][i];

1232 }

1233 aec_rdft_inverse_128(fft);

1234 scale = 2.0f / PART_LEN2;

1235 }

1236	1213

1237 // compute gain factor	1214 // compute gain factor

1238 for (j = 0; j < aec->num_bands - 1; ++j) {	1215 for (j = 0; j < aec->num_bands - 1; ++j) {

1239 for (i = 0; i < PART_LEN; i++) {	1216 for (i = 0; i < PART_LEN; i++) {

1240 dtmp = aec->dBufH[j][i];	1217 outputH[j][i] = aec->dBufH[j][i] * nlpGainHband;

1241 dtmp = dtmp * nlpGainHband; // for variable gain

1242

1243 // add some comfort noise where Hband is attenuated

1244 if (flagHbandCn == 1 && j == 0) {

1245 fft[i] *= scale; // fft scaling

1246 dtmp += cnScaleHband * fft[i];

1247 }

1248

1249 // Saturate output to keep it in the allowed range.

1250 outputH[j][i] = WEBRTC_SPL_SAT(

1251 WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);

1252 }	1218 }

1253 }	1219 }

	1220

	1221 // Add some comfort noise where Hband is attenuated.

	1222 for (i = 0; i < PART_LEN; i++) {

	1223 outputH[0][i] += cnScaleHband * fft[i];

	1224 }

	1225

	1226 // Saturate output to keep it in the allowed range.

	1227 for (j = 0; j < aec->num_bands - 1; ++j) {

	1228 for (i = 0; i < PART_LEN; i++) {

	1229 outputH[j][i] = WEBRTC_SPL_SAT(

	1230 WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN);

	1231 }

	1232 }

	1233

1254 }	1234 }

1255	1235

1256 // Copy the current block to the old position.	1236 // Copy the current block to the old position.

1257 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);	1237 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);

1258 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);	1238 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);

1259	1239

1260 // Copy the current block to the old position for H band	1240 // Copy the current block to the old position for H band

1261 for (j = 0; j < aec->num_bands - 1; ++j) {	1241 for (j = 0; j < aec->num_bands - 1; ++j) {

1262 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);	1242 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);

1263 }	1243 }

(...skipping 717 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1981 int WebRtcAec_extended_filter_enabled(AecCore* self) {	1961 int WebRtcAec_extended_filter_enabled(AecCore* self) {

1982 return self->extended_filter_enabled;	1962 return self->extended_filter_enabled;

1983 }	1963 }

1984	1964

1985 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }	1965 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }

1986	1966

1987 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {	1967 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {

1988 assert(delay >= 0);	1968 assert(delay >= 0);

1989 self->system_delay = delay;	1969 self->system_delay = delay;

1990 }	1970 }

OLD	NEW

« no previous file with comments | « no previous file | webrtc/modules/audio_processing/aec/aec_core_mips.c » ('j') | no next file with comments »