OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 26 matching lines...) Expand all Loading... | |
37 | 37 |
38 // Buffer size (samples) | 38 // Buffer size (samples) |
39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. | 39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. |
40 | 40 |
41 // Metrics | 41 // Metrics |
42 static const int subCountLen = 4; | 42 static const int subCountLen = 4; |
43 static const int countLen = 50; | 43 static const int countLen = 50; |
44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. | 44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. |
45 | 45 |
46 // Quantities to control H band scaling for SWB input | 46 // Quantities to control H band scaling for SWB input |
47 static const int flagHbandCn = 1; // flag for adding comfort noise in H band | |
minyue-webrtc
2015/12/04 09:10:10
I am not sure if this should be removed
peah-webrtc
2015/12/04 09:54:39
I thought first that this broke the code if not be
minyue-webrtc
2015/12/04 10:05:19
I will let you decide.
peah-webrtc
2015/12/04 22:52:19
Acknowledged.
| |
48 static const float cnScaleHband = | 47 static const float cnScaleHband = |
49 (float)0.4; // scale for comfort noise in H band | 48 (float)0.4; // scale for comfort noise in H band |
50 // Initial bin for averaging nlp gain in low band | 49 // Initial bin for averaging nlp gain in low band |
51 static const int freqAvgIc = PART_LEN / 2; | 50 static const int freqAvgIc = PART_LEN / 2; |
52 | 51 |
53 // Matlab code to produce table: | 52 // Matlab code to produce table: |
54 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; | 53 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; |
55 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); | 54 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); |
56 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { | 55 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { |
57 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, | 56 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, |
(...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
476 // tmp = 1 - lambda[i]; | 475 // tmp = 1 - lambda[i]; |
477 efw[0][i] += tmp * u[0][i]; | 476 efw[0][i] += tmp * u[0][i]; |
478 efw[1][i] += tmp * u[1][i]; | 477 efw[1][i] += tmp * u[1][i]; |
479 } | 478 } |
480 | 479 |
481 // For H band comfort noise | 480 // For H band comfort noise |
482 // TODO: don't compute noise and "tmp" twice. Use the previous results. | 481 // TODO: don't compute noise and "tmp" twice. Use the previous results. |
483 noiseAvg = 0.0; | 482 noiseAvg = 0.0; |
484 tmpAvg = 0.0; | 483 tmpAvg = 0.0; |
485 num = 0; | 484 num = 0; |
486 if (aec->num_bands > 1 && flagHbandCn == 1) { | 485 if (aec->num_bands > 1) { |
487 | 486 |
488 // average noise scale | 487 // average noise scale |
489 // average over second half of freq spectrum (i.e., 4->8khz) | 488 // average over second half of freq spectrum (i.e., 4->8khz) |
490 // TODO: we shouldn't need num. We know how many elements we're summing. | 489 // TODO: we shouldn't need num. We know how many elements we're summing. |
491 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { | 490 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { |
492 num++; | 491 num++; |
493 noiseAvg += sqrtf(noisePow[i]); | 492 noiseAvg += sqrtf(noisePow[i]); |
494 } | 493 } |
495 noiseAvg /= (float)num; | 494 noiseAvg /= (float)num; |
496 | 495 |
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
808 self->num_delay_values; | 807 self->num_delay_values; |
809 } | 808 } |
810 | 809 |
811 // Reset histogram. | 810 // Reset histogram. |
812 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); | 811 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); |
813 self->num_delay_values = 0; | 812 self->num_delay_values = 0; |
814 | 813 |
815 return; | 814 return; |
816 } | 815 } |
817 | 816 |
818 static void InverseFft(float freq_data[2][PART_LEN1], | 817 static void InverseFft(float freq_data[2][PART_LEN1], |
minyue-webrtc
2015/12/04 09:10:10
To me, inverseFft is a well defined term (although
peah-webrtc
2015/12/04 09:54:39
Good point! Could we rename it such that it become
minyue-webrtc
2015/12/04 10:05:19
I think we may keep InverseFft() as it was (unless
hlundin-webrtc
2015/12/04 11:48:06
The point with adding scale to this function is th
peah-webrtc
2015/12/04 22:52:19
Yes, that is correct. The scaling differs upon the
peah-webrtc
2015/12/04 22:52:20
I don't think that is a good solution as we would
minyue-webrtc
2015/12/08 12:36:23
You probably still need to, at least, rename the f
| |
819 float time_data[PART_LEN2]) { | 818 float time_data[PART_LEN2], |
819 float scale, | |
820 int conjugate) { | |
820 int i; | 821 int i; |
821 const float scale = 1.0f / PART_LEN2; | 822 const float sign = (conjugate ? -1 : 1); |
822 time_data[0] = freq_data[0][0] * scale; | 823 time_data[0] = freq_data[0][0] * scale; |
823 time_data[1] = freq_data[0][PART_LEN] * scale; | 824 time_data[1] = freq_data[0][PART_LEN] * scale; |
824 for (i = 1; i < PART_LEN; i++) { | 825 for (i = 1; i < PART_LEN; i++) { |
825 time_data[2 * i] = freq_data[0][i] * scale; | 826 time_data[2 * i] = freq_data[0][i] * scale; |
826 time_data[2 * i + 1] = freq_data[1][i] * scale; | 827 time_data[2 * i + 1] = sign * freq_data[1][i] * scale; |
827 } | 828 } |
828 aec_rdft_inverse_128(time_data); | 829 aec_rdft_inverse_128(time_data); |
829 } | 830 } |
830 | 831 |
831 | 832 |
832 static void Fft(float time_data[PART_LEN2], | 833 static void Fft(float time_data[PART_LEN2], |
833 float freq_data[2][PART_LEN1]) { | 834 float freq_data[2][PART_LEN1]) { |
834 int i; | 835 int i; |
835 aec_rdft_forward_128(time_data); | 836 aec_rdft_forward_128(time_data); |
836 | 837 |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
958 | 959 |
959 | 960 |
960 // Produce echo estimate s_fft. | 961 // Produce echo estimate s_fft. |
961 WebRtcAec_FilterFar(num_partitions, | 962 WebRtcAec_FilterFar(num_partitions, |
962 x_fft_buf_block_pos, | 963 x_fft_buf_block_pos, |
963 x_fft_buf, | 964 x_fft_buf, |
964 h_fft_buf, | 965 h_fft_buf, |
965 s_fft); | 966 s_fft); |
966 | 967 |
967 // Compute the time-domain echo estimate s. | 968 // Compute the time-domain echo estimate s. |
968 InverseFft(s_fft, s_extended); | 969 InverseFft(s_fft, s_extended, 2.0f / PART_LEN2, 0); |
969 s = &s_extended[PART_LEN]; | 970 s = &s_extended[PART_LEN]; |
970 for (i = 0; i < PART_LEN; ++i) { | |
971 s[i] *= 2.0f; | |
972 } | |
973 | 971 |
974 // Compute the time-domain echo prediction error. | 972 // Compute the time-domain echo prediction error. |
975 for (i = 0; i < PART_LEN; ++i) { | 973 for (i = 0; i < PART_LEN; ++i) { |
976 e[i] = y[i] - s[i]; | 974 e[i] = y[i] - s[i]; |
977 } | 975 } |
978 | 976 |
979 // Compute the frequency domain echo prediction error. | 977 // Compute the frequency domain echo prediction error. |
980 memset(e_extended, 0, sizeof(float) * PART_LEN); | 978 memset(e_extended, 0, sizeof(float) * PART_LEN); |
981 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); | 979 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); |
982 Fft(e_extended, e_fft); | 980 Fft(e_extended, e_fft); |
(...skipping 26 matching lines...) Expand all Loading... | |
1009 | 1007 |
1010 static void EchoSuppression(AecCore* aec, | 1008 static void EchoSuppression(AecCore* aec, |
1011 float* echo_subtractor_output, | 1009 float* echo_subtractor_output, |
1012 float* output, | 1010 float* output, |
1013 float* const* outputH) { | 1011 float* const* outputH) { |
1014 float efw[2][PART_LEN1]; | 1012 float efw[2][PART_LEN1]; |
1015 float xfw[2][PART_LEN1]; | 1013 float xfw[2][PART_LEN1]; |
1016 float dfw[2][PART_LEN1]; | 1014 float dfw[2][PART_LEN1]; |
1017 float comfortNoiseHband[2][PART_LEN1]; | 1015 float comfortNoiseHband[2][PART_LEN1]; |
1018 float fft[PART_LEN2]; | 1016 float fft[PART_LEN2]; |
1019 float scale, dtmp; | |
1020 float nlpGainHband; | 1017 float nlpGainHband; |
1021 int i; | 1018 int i; |
1022 size_t j; | 1019 size_t j; |
1023 | 1020 |
1024 // Coherence and non-linear filter | 1021 // Coherence and non-linear filter |
1025 float cohde[PART_LEN1], cohxd[PART_LEN1]; | 1022 float cohde[PART_LEN1], cohxd[PART_LEN1]; |
1026 float hNlDeAvg, hNlXdAvg; | 1023 float hNlDeAvg, hNlXdAvg; |
1027 float hNl[PART_LEN1]; | 1024 float hNl[PART_LEN1]; |
1028 float hNlPref[kPrefBandSize]; | 1025 float hNlPref[kPrefBandSize]; |
1029 float hNlFb = 0, hNlFbLow = 0; | 1026 float hNlFb = 0, hNlFbLow = 0; |
(...skipping 18 matching lines...) Expand all Loading... | |
1048 // Analysis filter banks for the echo suppressor. | 1045 // Analysis filter banks for the echo suppressor. |
1049 // Windowed near-end ffts. | 1046 // Windowed near-end ffts. |
1050 WindowData(fft, aec->dBuf); | 1047 WindowData(fft, aec->dBuf); |
1051 aec_rdft_forward_128(fft); | 1048 aec_rdft_forward_128(fft); |
1052 WebRtcAec_StoreAsComplex(fft, dfw); | 1049 WebRtcAec_StoreAsComplex(fft, dfw); |
1053 | 1050 |
1054 // Windowed echo suppressor output ffts. | 1051 // Windowed echo suppressor output ffts. |
1055 WindowData(fft, aec->eBuf); | 1052 WindowData(fft, aec->eBuf); |
1056 aec_rdft_forward_128(fft); | 1053 aec_rdft_forward_128(fft); |
1057 StoreAsComplex(fft, efw); | 1054 StoreAsComplex(fft, efw); |
1058 | 1055 |
hlundin-webrtc
2015/12/04 11:48:06
Remove extra line.
peah-webrtc
2015/12/04 22:52:19
Done.
| |
1059 aec->delayEstCtr++; | |
1060 if (aec->delayEstCtr == delayEstInterval) { | |
1061 aec->delayEstCtr = 0; | |
1062 } | |
1063 | 1056 |
1064 // We should always have at least one element stored in |far_buf|. | 1057 // We should always have at least one element stored in |far_buf|. |
1065 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); | 1058 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); |
1066 // NLP | 1059 // NLP |
1067 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); | 1060 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); |
1068 | 1061 |
1069 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of | 1062 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of |
1070 // |xfwBuf|. | 1063 // |xfwBuf|. |
1071 // Buffer far. | 1064 // Buffer far. |
1072 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); | 1065 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); |
1073 | 1066 |
1074 if (aec->delayEstCtr == 0) | 1067 aec->delayEstCtr++; |
1068 if (aec->delayEstCtr == delayEstInterval) { | |
1069 aec->delayEstCtr = 0; | |
1075 aec->delayIdx = WebRtcAec_PartitionDelay(aec); | 1070 aec->delayIdx = WebRtcAec_PartitionDelay(aec); |
1071 } | |
1072 | |
hlundin-webrtc
2015/12/04 11:48:06
Remove extra line.
peah-webrtc
2015/12/04 22:52:19
Done.
| |
1076 | 1073 |
1077 // Use delayed far. | 1074 // Use delayed far. |
1078 memcpy(xfw, | 1075 memcpy(xfw, |
1079 aec->xfwBuf + aec->delayIdx * PART_LEN1, | 1076 aec->xfwBuf + aec->delayIdx * PART_LEN1, |
1080 sizeof(xfw[0][0]) * 2 * PART_LEN1); | 1077 sizeof(xfw[0][0]) * 2 * PART_LEN1); |
1081 | 1078 |
1082 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, | 1079 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, |
1083 &aec->esup_detected_extreme_filter_divergence); | 1080 &aec->esup_detected_extreme_filter_divergence); |
1084 | 1081 |
1085 | 1082 |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1187 | 1184 |
1188 // TODO(bjornv): Investigate how to take the windowing below into account if | 1185 // TODO(bjornv): Investigate how to take the windowing below into account if |
1189 // needed. | 1186 // needed. |
1190 if (aec->metricsMode == 1) { | 1187 if (aec->metricsMode == 1) { |
1191 // Note that we have a scaling by two in the time domain |eBuf|. | 1188 // Note that we have a scaling by two in the time domain |eBuf|. |
1192 // In addition the time domain signal is windowed before transformation, | 1189 // In addition the time domain signal is windowed before transformation, |
1193 // losing half the energy on the average. We take care of the first | 1190 // losing half the energy on the average. We take care of the first |
1194 // scaling only in UpdateMetrics(). | 1191 // scaling only in UpdateMetrics(). |
1195 UpdateLevel(&aec->nlpoutlevel, efw); | 1192 UpdateLevel(&aec->nlpoutlevel, efw); |
1196 } | 1193 } |
1194 | |
1197 // Inverse error fft. | 1195 // Inverse error fft. |
1198 fft[0] = efw[0][0]; | 1196 InverseFft(efw, fft, 2.0f / PART_LEN2, 1); |
minyue-webrtc
2015/12/04 09:10:10
you can still use InverseFft of old version and ad
peah-webrtc
2015/12/04 09:54:39
The problem with that, is that I'll then need to d
minyue-webrtc
2015/12/04 10:05:19
SGTM
peah-webrtc
2015/12/04 22:52:20
Acknowledged.
| |
1199 fft[1] = efw[0][PART_LEN]; | |
1200 for (i = 1; i < PART_LEN; i++) { | |
1201 fft[2 * i] = efw[0][i]; | |
1202 // Sign change required by Ooura fft. | |
1203 fft[2 * i + 1] = -efw[1][i]; | |
1204 } | |
1205 aec_rdft_inverse_128(fft); | |
1206 | 1197 |
1207 // Overlap and add to obtain output. | 1198 // Overlap and add to obtain output. |
1208 scale = 2.0f / PART_LEN2; | |
1209 for (i = 0; i < PART_LEN; i++) { | 1199 for (i = 0; i < PART_LEN; i++) { |
1210 fft[i] *= scale; // fft scaling | 1200 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + |
1211 fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; | 1201 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); |
1212 | |
1213 fft[PART_LEN + i] *= scale; // fft scaling | |
1214 aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; | |
1215 | 1202 |
1216 // Saturate output to keep it in the allowed range. | 1203 // Saturate output to keep it in the allowed range. |
1217 output[i] = WEBRTC_SPL_SAT( | 1204 output[i] = WEBRTC_SPL_SAT( |
1218 WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); | 1205 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); |
1219 } | 1206 } |
1207 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); | |
1220 | 1208 |
1221 // For H band | 1209 // For H band |
1222 if (aec->num_bands > 1) { | 1210 if (aec->num_bands > 1) { |
1223 | |
1224 // H band gain | 1211 // H band gain |
1225 // average nlp over low band: average over second half of freq spectrum | 1212 // average nlp over low band: average over second half of freq spectrum |
1226 // (4->8khz) | 1213 // (4->8khz) |
1227 GetHighbandGain(hNl, &nlpGainHband); | 1214 GetHighbandGain(hNl, &nlpGainHband); |
1228 | 1215 |
1229 // Inverse comfort_noise | 1216 // Inverse comfort_noise |
1230 if (flagHbandCn == 1) { | 1217 InverseFft(comfortNoiseHband, fft, 2.0f / PART_LEN2, 0); |
1231 fft[0] = comfortNoiseHband[0][0]; | |
1232 fft[1] = comfortNoiseHband[0][PART_LEN]; | |
1233 for (i = 1; i < PART_LEN; i++) { | |
1234 fft[2 * i] = comfortNoiseHband[0][i]; | |
1235 fft[2 * i + 1] = comfortNoiseHband[1][i]; | |
1236 } | |
1237 aec_rdft_inverse_128(fft); | |
1238 scale = 2.0f / PART_LEN2; | |
1239 } | |
1240 | 1218 |
1241 // compute gain factor | 1219 // compute gain factor |
1242 for (j = 0; j < aec->num_bands - 1; ++j) { | 1220 for (j = 0; j < aec->num_bands - 1; ++j) { |
1243 for (i = 0; i < PART_LEN; i++) { | 1221 for (i = 0; i < PART_LEN; i++) { |
1244 dtmp = aec->dBufH[j][i]; | 1222 outputH[j][i] = aec->dBufH[j][i] * nlpGainHband; |
1245 dtmp = dtmp * nlpGainHband; // for variable gain | |
1246 | |
1247 // add some comfort noise where Hband is attenuated | |
1248 if (flagHbandCn == 1 && j == 0) { | |
1249 fft[i] *= scale; // fft scaling | |
1250 dtmp += cnScaleHband * fft[i]; | |
1251 } | |
1252 | |
1253 // Saturate output to keep it in the allowed range. | |
1254 outputH[j][i] = WEBRTC_SPL_SAT( | |
1255 WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); | |
1256 } | 1223 } |
1257 } | 1224 } |
1225 | |
1226 // Add some comfort noise where Hband is attenuated. | |
1227 for (i = 0; i < PART_LEN; i++) { | |
1228 outputH[0][i] += cnScaleHband * fft[i]; | |
1229 } | |
1230 | |
1231 // Saturate output to keep it in the allowed range. | |
1232 for (j = 0; j < aec->num_bands - 1; ++j) { | |
1233 for (i = 0; i < PART_LEN; i++) { | |
1234 outputH[j][i] = WEBRTC_SPL_SAT( | |
1235 WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN); | |
1236 } | |
1237 } | |
1238 | |
1258 } | 1239 } |
1259 | 1240 |
1260 // Copy the current block to the old position. | 1241 // Copy the current block to the old position. |
1261 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); | 1242 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); |
1262 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); | 1243 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); |
1263 | 1244 |
1264 // Copy the current block to the old position for H band | 1245 // Copy the current block to the old position for H band |
1265 for (j = 0; j < aec->num_bands - 1; ++j) { | 1246 for (j = 0; j < aec->num_bands - 1; ++j) { |
1266 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); | 1247 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); |
1267 } | 1248 } |
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1985 int WebRtcAec_extended_filter_enabled(AecCore* self) { | 1966 int WebRtcAec_extended_filter_enabled(AecCore* self) { |
1986 return self->extended_filter_enabled; | 1967 return self->extended_filter_enabled; |
1987 } | 1968 } |
1988 | 1969 |
1989 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } | 1970 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } |
1990 | 1971 |
1991 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { | 1972 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { |
1992 assert(delay >= 0); | 1973 assert(delay >= 0); |
1993 self->system_delay = delay; | 1974 self->system_delay = delay; |
1994 } | 1975 } |
OLD | NEW |