Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core.c

Issue 1492343002: Echo Suppressor code simplification using the InverseFft function (#5) (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ESUP_refactoring4_CL
Patch Set: Changed the scaling of the ScaledInverseFft Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | webrtc/modules/audio_processing/aec/aec_core_mips.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 26 matching lines...) Expand all
37 37
38 // Buffer size (samples) 38 // Buffer size (samples)
39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. 39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz.
40 40
41 // Metrics 41 // Metrics
42 static const int subCountLen = 4; 42 static const int subCountLen = 4;
43 static const int countLen = 50; 43 static const int countLen = 50;
44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. 44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.
45 45
46 // Quantities to control H band scaling for SWB input 46 // Quantities to control H band scaling for SWB input
47 static const int flagHbandCn = 1; // flag for adding comfort noise in H band
48 static const float cnScaleHband = 47 static const float cnScaleHband =
49 (float)0.4; // scale for comfort noise in H band 48 (float)0.4; // scale for comfort noise in H band
50 // Initial bin for averaging nlp gain in low band 49 // Initial bin for averaging nlp gain in low band
51 static const int freqAvgIc = PART_LEN / 2; 50 static const int freqAvgIc = PART_LEN / 2;
52 51
53 // Matlab code to produce table: 52 // Matlab code to produce table:
54 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; 53 // win = sqrt(hanning(63)); win = [0 ; win(1:32)];
55 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); 54 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
56 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { 55 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = {
57 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, 56 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
(...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 // tmp = 1 - lambda[i]; 475 // tmp = 1 - lambda[i];
477 efw[0][i] += tmp * u[0][i]; 476 efw[0][i] += tmp * u[0][i];
478 efw[1][i] += tmp * u[1][i]; 477 efw[1][i] += tmp * u[1][i];
479 } 478 }
480 479
481 // For H band comfort noise 480 // For H band comfort noise
482 // TODO: don't compute noise and "tmp" twice. Use the previous results. 481 // TODO: don't compute noise and "tmp" twice. Use the previous results.
483 noiseAvg = 0.0; 482 noiseAvg = 0.0;
484 tmpAvg = 0.0; 483 tmpAvg = 0.0;
485 num = 0; 484 num = 0;
486 if (aec->num_bands > 1 && flagHbandCn == 1) { 485 if (aec->num_bands > 1) {
487 486
488 // average noise scale 487 // average noise scale
489 // average over second half of freq spectrum (i.e., 4->8khz) 488 // average over second half of freq spectrum (i.e., 4->8khz)
490 // TODO: we shouldn't need num. We know how many elements we're summing. 489 // TODO: we shouldn't need num. We know how many elements we're summing.
491 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 490 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
492 num++; 491 num++;
493 noiseAvg += sqrtf(noisePow[i]); 492 noiseAvg += sqrtf(noisePow[i]);
494 } 493 }
495 noiseAvg /= (float)num; 494 noiseAvg /= (float)num;
496 495
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after
807 self->num_delay_values; 806 self->num_delay_values;
808 } 807 }
809 808
810 // Reset histogram. 809 // Reset histogram.
811 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); 810 memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
812 self->num_delay_values = 0; 811 self->num_delay_values = 0;
813 812
814 return; 813 return;
815 } 814 }
816 815
817 static void InverseFft(float freq_data[2][PART_LEN1], 816 static void ScaledInverseFft(float freq_data[2][PART_LEN1],
818 float time_data[PART_LEN2]) { 817 float time_data[PART_LEN2],
818 float scale,
819 int conjugate) {
819 int i; 820 int i;
820 const float scale = 1.0f / PART_LEN2; 821 const float normalization = scale / ((float)PART_LEN2);
821 time_data[0] = freq_data[0][0] * scale; 822 const float sign = (conjugate ? -1 : 1);
822 time_data[1] = freq_data[0][PART_LEN] * scale; 823 time_data[0] = freq_data[0][0] * normalization;
824 time_data[1] = freq_data[0][PART_LEN] * normalization;
823 for (i = 1; i < PART_LEN; i++) { 825 for (i = 1; i < PART_LEN; i++) {
824 time_data[2 * i] = freq_data[0][i] * scale; 826 time_data[2 * i] = freq_data[0][i] * normalization;
825 time_data[2 * i + 1] = freq_data[1][i] * scale; 827 time_data[2 * i + 1] = sign * freq_data[1][i] * normalization;
826 } 828 }
827 aec_rdft_inverse_128(time_data); 829 aec_rdft_inverse_128(time_data);
828 } 830 }
829 831
830 832
831 static void Fft(float time_data[PART_LEN2], 833 static void Fft(float time_data[PART_LEN2],
832 float freq_data[2][PART_LEN1]) { 834 float freq_data[2][PART_LEN1]) {
833 int i; 835 int i;
834 aec_rdft_forward_128(time_data); 836 aec_rdft_forward_128(time_data);
835 837
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
956 } 958 }
957 959
958 // Produce echo estimate s_fft. 960 // Produce echo estimate s_fft.
959 WebRtcAec_FilterFar(num_partitions, 961 WebRtcAec_FilterFar(num_partitions,
960 x_fft_buf_block_pos, 962 x_fft_buf_block_pos,
961 x_fft_buf, 963 x_fft_buf,
962 h_fft_buf, 964 h_fft_buf,
963 s_fft); 965 s_fft);
964 966
965 // Compute the time-domain echo estimate s. 967 // Compute the time-domain echo estimate s.
966 InverseFft(s_fft, s_extended); 968 ScaledInverseFft(s_fft, s_extended, 2.0f, 0);
967 s = &s_extended[PART_LEN]; 969 s = &s_extended[PART_LEN];
968 for (i = 0; i < PART_LEN; ++i) {
969 s[i] *= 2.0f;
970 }
971 970
972 // Compute the time-domain echo prediction error. 971 // Compute the time-domain echo prediction error.
973 for (i = 0; i < PART_LEN; ++i) { 972 for (i = 0; i < PART_LEN; ++i) {
974 e[i] = y[i] - s[i]; 973 e[i] = y[i] - s[i];
975 } 974 }
976 975
977 // Compute the frequency domain echo prediction error. 976 // Compute the frequency domain echo prediction error.
978 memset(e_extended, 0, sizeof(float) * PART_LEN); 977 memset(e_extended, 0, sizeof(float) * PART_LEN);
979 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); 978 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);
980 Fft(e_extended, e_fft); 979 Fft(e_extended, e_fft);
(...skipping 26 matching lines...) Expand all
1007 1006
1008 static void EchoSuppression(AecCore* aec, 1007 static void EchoSuppression(AecCore* aec,
1009 float* echo_subtractor_output, 1008 float* echo_subtractor_output,
1010 float* output, 1009 float* output,
1011 float* const* outputH) { 1010 float* const* outputH) {
1012 float efw[2][PART_LEN1]; 1011 float efw[2][PART_LEN1];
1013 float xfw[2][PART_LEN1]; 1012 float xfw[2][PART_LEN1];
1014 float dfw[2][PART_LEN1]; 1013 float dfw[2][PART_LEN1];
1015 float comfortNoiseHband[2][PART_LEN1]; 1014 float comfortNoiseHband[2][PART_LEN1];
1016 float fft[PART_LEN2]; 1015 float fft[PART_LEN2];
1017 float scale, dtmp;
1018 float nlpGainHband; 1016 float nlpGainHband;
1019 int i; 1017 int i;
1020 size_t j; 1018 size_t j;
1021 1019
1022 // Coherence and non-linear filter 1020 // Coherence and non-linear filter
1023 float cohde[PART_LEN1], cohxd[PART_LEN1]; 1021 float cohde[PART_LEN1], cohxd[PART_LEN1];
1024 float hNlDeAvg, hNlXdAvg; 1022 float hNlDeAvg, hNlXdAvg;
1025 float hNl[PART_LEN1]; 1023 float hNl[PART_LEN1];
1026 float hNlPref[kPrefBandSize]; 1024 float hNlPref[kPrefBandSize];
1027 float hNlFb = 0, hNlFbLow = 0; 1025 float hNlFb = 0, hNlFbLow = 0;
(...skipping 19 matching lines...) Expand all
1047 // Windowed near-end ffts. 1045 // Windowed near-end ffts.
1048 WindowData(fft, aec->dBuf); 1046 WindowData(fft, aec->dBuf);
1049 aec_rdft_forward_128(fft); 1047 aec_rdft_forward_128(fft);
1050 StoreAsComplex(fft, dfw); 1048 StoreAsComplex(fft, dfw);
1051 1049
1052 // Windowed echo suppressor output ffts. 1050 // Windowed echo suppressor output ffts.
1053 WindowData(fft, aec->eBuf); 1051 WindowData(fft, aec->eBuf);
1054 aec_rdft_forward_128(fft); 1052 aec_rdft_forward_128(fft);
1055 StoreAsComplex(fft, efw); 1053 StoreAsComplex(fft, efw);
1056 1054
1057 aec->delayEstCtr++;
1058 if (aec->delayEstCtr == delayEstInterval) {
1059 aec->delayEstCtr = 0;
1060 }
1061
1062 // We should always have at least one element stored in |far_buf|. 1055 // We should always have at least one element stored in |far_buf|.
1063 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); 1056 assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
1064 // NLP 1057 // NLP
1065 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); 1058 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);
1066 1059
1067 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of 1060 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
1068 // |xfwBuf|. 1061 // |xfwBuf|.
1069 // Buffer far. 1062 // Buffer far.
1070 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); 1063 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
1071 1064
1072 if (aec->delayEstCtr == 0) 1065 aec->delayEstCtr++;
1066 if (aec->delayEstCtr == delayEstInterval) {
1067 aec->delayEstCtr = 0;
1073 aec->delayIdx = WebRtcAec_PartitionDelay(aec); 1068 aec->delayIdx = WebRtcAec_PartitionDelay(aec);
1069 }
1074 1070
1075 // Use delayed far. 1071 // Use delayed far.
1076 memcpy(xfw, 1072 memcpy(xfw,
1077 aec->xfwBuf + aec->delayIdx * PART_LEN1, 1073 aec->xfwBuf + aec->delayIdx * PART_LEN1,
1078 sizeof(xfw[0][0]) * 2 * PART_LEN1); 1074 sizeof(xfw[0][0]) * 2 * PART_LEN1);
1079 1075
1080 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, 1076 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,
1081 &aec->extreme_filter_divergence); 1077 &aec->extreme_filter_divergence);
1082 1078
1083 // Select the microphone signal as output if the filter is deemed to have 1079 // Select the microphone signal as output if the filter is deemed to have
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
1183 1179
1184 // TODO(bjornv): Investigate how to take the windowing below into account if 1180 // TODO(bjornv): Investigate how to take the windowing below into account if
1185 // needed. 1181 // needed.
1186 if (aec->metricsMode == 1) { 1182 if (aec->metricsMode == 1) {
1187 // Note that we have a scaling by two in the time domain |eBuf|. 1183 // Note that we have a scaling by two in the time domain |eBuf|.
1188 // In addition the time domain signal is windowed before transformation, 1184 // In addition the time domain signal is windowed before transformation,
1189 // losing half the energy on the average. We take care of the first 1185 // losing half the energy on the average. We take care of the first
1190 // scaling only in UpdateMetrics(). 1186 // scaling only in UpdateMetrics().
1191 UpdateLevel(&aec->nlpoutlevel, efw); 1187 UpdateLevel(&aec->nlpoutlevel, efw);
1192 } 1188 }
1189
1193 // Inverse error fft. 1190 // Inverse error fft.
1194 fft[0] = efw[0][0]; 1191 ScaledInverseFft(efw, fft, 2.0f, 1);
1195 fft[1] = efw[0][PART_LEN];
1196 for (i = 1; i < PART_LEN; i++) {
1197 fft[2 * i] = efw[0][i];
1198 // Sign change required by Ooura fft.
1199 fft[2 * i + 1] = -efw[1][i];
1200 }
1201 aec_rdft_inverse_128(fft);
1202 1192
1203 // Overlap and add to obtain output. 1193 // Overlap and add to obtain output.
1204 scale = 2.0f / PART_LEN2;
1205 for (i = 0; i < PART_LEN; i++) { 1194 for (i = 0; i < PART_LEN; i++) {
1206 fft[i] *= scale; // fft scaling 1195 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +
1207 fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; 1196 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);
1208
1209 fft[PART_LEN + i] *= scale; // fft scaling
1210 aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
1211 1197
1212 // Saturate output to keep it in the allowed range. 1198 // Saturate output to keep it in the allowed range.
1213 output[i] = WEBRTC_SPL_SAT( 1199 output[i] = WEBRTC_SPL_SAT(
1214 WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); 1200 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);
1215 } 1201 }
1202 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));
1216 1203
1217 // For H band 1204 // For H band
1218 if (aec->num_bands > 1) { 1205 if (aec->num_bands > 1) {
1219
1220 // H band gain 1206 // H band gain
1221 // average nlp over low band: average over second half of freq spectrum 1207 // average nlp over low band: average over second half of freq spectrum
1222 // (4->8khz) 1208 // (4->8khz)
1223 GetHighbandGain(hNl, &nlpGainHband); 1209 GetHighbandGain(hNl, &nlpGainHband);
1224 1210
1225 // Inverse comfort_noise 1211 // Inverse comfort_noise
1226 if (flagHbandCn == 1) { 1212 ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0);
1227 fft[0] = comfortNoiseHband[0][0];
1228 fft[1] = comfortNoiseHband[0][PART_LEN];
1229 for (i = 1; i < PART_LEN; i++) {
1230 fft[2 * i] = comfortNoiseHband[0][i];
1231 fft[2 * i + 1] = comfortNoiseHband[1][i];
1232 }
1233 aec_rdft_inverse_128(fft);
1234 scale = 2.0f / PART_LEN2;
1235 }
1236 1213
1237 // compute gain factor 1214 // compute gain factor
1238 for (j = 0; j < aec->num_bands - 1; ++j) { 1215 for (j = 0; j < aec->num_bands - 1; ++j) {
1239 for (i = 0; i < PART_LEN; i++) { 1216 for (i = 0; i < PART_LEN; i++) {
1240 dtmp = aec->dBufH[j][i]; 1217 outputH[j][i] = aec->dBufH[j][i] * nlpGainHband;
1241 dtmp = dtmp * nlpGainHband; // for variable gain
1242
1243 // add some comfort noise where Hband is attenuated
1244 if (flagHbandCn == 1 && j == 0) {
1245 fft[i] *= scale; // fft scaling
1246 dtmp += cnScaleHband * fft[i];
1247 }
1248
1249 // Saturate output to keep it in the allowed range.
1250 outputH[j][i] = WEBRTC_SPL_SAT(
1251 WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
1252 } 1218 }
1253 } 1219 }
1220
1221 // Add some comfort noise where Hband is attenuated.
1222 for (i = 0; i < PART_LEN; i++) {
1223 outputH[0][i] += cnScaleHband * fft[i];
1224 }
1225
1226 // Saturate output to keep it in the allowed range.
1227 for (j = 0; j < aec->num_bands - 1; ++j) {
1228 for (i = 0; i < PART_LEN; i++) {
1229 outputH[j][i] = WEBRTC_SPL_SAT(
1230 WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN);
1231 }
1232 }
1233
1254 } 1234 }
1255 1235
1256 // Copy the current block to the old position. 1236 // Copy the current block to the old position.
1257 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); 1237 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
1258 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); 1238 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
1259 1239
1260 // Copy the current block to the old position for H band 1240 // Copy the current block to the old position for H band
1261 for (j = 0; j < aec->num_bands - 1; ++j) { 1241 for (j = 0; j < aec->num_bands - 1; ++j) {
1262 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); 1242 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);
1263 } 1243 }
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after
1981 int WebRtcAec_extended_filter_enabled(AecCore* self) { 1961 int WebRtcAec_extended_filter_enabled(AecCore* self) {
1982 return self->extended_filter_enabled; 1962 return self->extended_filter_enabled;
1983 } 1963 }
1984 1964
1985 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } 1965 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
1986 1966
1987 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { 1967 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
1988 assert(delay >= 0); 1968 assert(delay >= 0);
1989 self->system_delay = delay; 1969 self->system_delay = delay;
1990 } 1970 }
OLDNEW
« no previous file with comments | « no previous file | webrtc/modules/audio_processing/aec/aec_core_mips.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698