OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 26 matching lines...) Expand all Loading... |
37 | 37 |
38 // Buffer size (samples) | 38 // Buffer size (samples) |
39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. | 39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. |
40 | 40 |
41 // Metrics | 41 // Metrics |
42 static const int subCountLen = 4; | 42 static const int subCountLen = 4; |
43 static const int countLen = 50; | 43 static const int countLen = 50; |
44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. | 44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. |
45 | 45 |
46 // Quantities to control H band scaling for SWB input | 46 // Quantities to control H band scaling for SWB input |
47 static const int flagHbandCn = 1; // flag for adding comfort noise in H band | |
48 static const float cnScaleHband = | 47 static const float cnScaleHband = |
49 (float)0.4; // scale for comfort noise in H band | 48 (float)0.4; // scale for comfort noise in H band |
50 // Initial bin for averaging nlp gain in low band | 49 // Initial bin for averaging nlp gain in low band |
51 static const int freqAvgIc = PART_LEN / 2; | 50 static const int freqAvgIc = PART_LEN / 2; |
52 | 51 |
53 // Matlab code to produce table: | 52 // Matlab code to produce table: |
54 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; | 53 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; |
55 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); | 54 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); |
56 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { | 55 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { |
57 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, | 56 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, |
(...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
476 // tmp = 1 - lambda[i]; | 475 // tmp = 1 - lambda[i]; |
477 efw[0][i] += tmp * u[0][i]; | 476 efw[0][i] += tmp * u[0][i]; |
478 efw[1][i] += tmp * u[1][i]; | 477 efw[1][i] += tmp * u[1][i]; |
479 } | 478 } |
480 | 479 |
481 // For H band comfort noise | 480 // For H band comfort noise |
482 // TODO: don't compute noise and "tmp" twice. Use the previous results. | 481 // TODO: don't compute noise and "tmp" twice. Use the previous results. |
483 noiseAvg = 0.0; | 482 noiseAvg = 0.0; |
484 tmpAvg = 0.0; | 483 tmpAvg = 0.0; |
485 num = 0; | 484 num = 0; |
486 if (aec->num_bands > 1 && flagHbandCn == 1) { | 485 if (aec->num_bands > 1) { |
487 | 486 |
488 // average noise scale | 487 // average noise scale |
489 // average over second half of freq spectrum (i.e., 4->8khz) | 488 // average over second half of freq spectrum (i.e., 4->8khz) |
490 // TODO: we shouldn't need num. We know how many elements we're summing. | 489 // TODO: we shouldn't need num. We know how many elements we're summing. |
491 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { | 490 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { |
492 num++; | 491 num++; |
493 noiseAvg += sqrtf(noisePow[i]); | 492 noiseAvg += sqrtf(noisePow[i]); |
494 } | 493 } |
495 noiseAvg /= (float)num; | 494 noiseAvg /= (float)num; |
496 | 495 |
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
807 self->num_delay_values; | 806 self->num_delay_values; |
808 } | 807 } |
809 | 808 |
810 // Reset histogram. | 809 // Reset histogram. |
811 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); | 810 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); |
812 self->num_delay_values = 0; | 811 self->num_delay_values = 0; |
813 | 812 |
814 return; | 813 return; |
815 } | 814 } |
816 | 815 |
817 static void InverseFft(float freq_data[2][PART_LEN1], | 816 static void ScaledInverseFft(float freq_data[2][PART_LEN1], |
818 float time_data[PART_LEN2]) { | 817 float time_data[PART_LEN2], |
| 818 float scale, |
| 819 int conjugate) { |
819 int i; | 820 int i; |
820 const float scale = 1.0f / PART_LEN2; | 821 const float normalization = scale / ((float)PART_LEN2); |
821 time_data[0] = freq_data[0][0] * scale; | 822 const float sign = (conjugate ? -1 : 1); |
822 time_data[1] = freq_data[0][PART_LEN] * scale; | 823 time_data[0] = freq_data[0][0] * normalization; |
| 824 time_data[1] = freq_data[0][PART_LEN] * normalization; |
823 for (i = 1; i < PART_LEN; i++) { | 825 for (i = 1; i < PART_LEN; i++) { |
824 time_data[2 * i] = freq_data[0][i] * scale; | 826 time_data[2 * i] = freq_data[0][i] * normalization; |
825 time_data[2 * i + 1] = freq_data[1][i] * scale; | 827 time_data[2 * i + 1] = sign * freq_data[1][i] * normalization; |
826 } | 828 } |
827 aec_rdft_inverse_128(time_data); | 829 aec_rdft_inverse_128(time_data); |
828 } | 830 } |
829 | 831 |
830 | 832 |
831 static void Fft(float time_data[PART_LEN2], | 833 static void Fft(float time_data[PART_LEN2], |
832 float freq_data[2][PART_LEN1]) { | 834 float freq_data[2][PART_LEN1]) { |
833 int i; | 835 int i; |
834 aec_rdft_forward_128(time_data); | 836 aec_rdft_forward_128(time_data); |
835 | 837 |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
956 } | 958 } |
957 | 959 |
958 // Produce echo estimate s_fft. | 960 // Produce echo estimate s_fft. |
959 WebRtcAec_FilterFar(num_partitions, | 961 WebRtcAec_FilterFar(num_partitions, |
960 x_fft_buf_block_pos, | 962 x_fft_buf_block_pos, |
961 x_fft_buf, | 963 x_fft_buf, |
962 h_fft_buf, | 964 h_fft_buf, |
963 s_fft); | 965 s_fft); |
964 | 966 |
965 // Compute the time-domain echo estimate s. | 967 // Compute the time-domain echo estimate s. |
966 InverseFft(s_fft, s_extended); | 968 ScaledInverseFft(s_fft, s_extended, 2.0f, 0); |
967 s = &s_extended[PART_LEN]; | 969 s = &s_extended[PART_LEN]; |
968 for (i = 0; i < PART_LEN; ++i) { | |
969 s[i] *= 2.0f; | |
970 } | |
971 | 970 |
972 // Compute the time-domain echo prediction error. | 971 // Compute the time-domain echo prediction error. |
973 for (i = 0; i < PART_LEN; ++i) { | 972 for (i = 0; i < PART_LEN; ++i) { |
974 e[i] = y[i] - s[i]; | 973 e[i] = y[i] - s[i]; |
975 } | 974 } |
976 | 975 |
977 // Compute the frequency domain echo prediction error. | 976 // Compute the frequency domain echo prediction error. |
978 memset(e_extended, 0, sizeof(float) * PART_LEN); | 977 memset(e_extended, 0, sizeof(float) * PART_LEN); |
979 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); | 978 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); |
980 Fft(e_extended, e_fft); | 979 Fft(e_extended, e_fft); |
(...skipping 26 matching lines...) Expand all Loading... |
1007 | 1006 |
1008 static void EchoSuppression(AecCore* aec, | 1007 static void EchoSuppression(AecCore* aec, |
1009 float* echo_subtractor_output, | 1008 float* echo_subtractor_output, |
1010 float* output, | 1009 float* output, |
1011 float* const* outputH) { | 1010 float* const* outputH) { |
1012 float efw[2][PART_LEN1]; | 1011 float efw[2][PART_LEN1]; |
1013 float xfw[2][PART_LEN1]; | 1012 float xfw[2][PART_LEN1]; |
1014 float dfw[2][PART_LEN1]; | 1013 float dfw[2][PART_LEN1]; |
1015 float comfortNoiseHband[2][PART_LEN1]; | 1014 float comfortNoiseHband[2][PART_LEN1]; |
1016 float fft[PART_LEN2]; | 1015 float fft[PART_LEN2]; |
1017 float scale, dtmp; | |
1018 float nlpGainHband; | 1016 float nlpGainHband; |
1019 int i; | 1017 int i; |
1020 size_t j; | 1018 size_t j; |
1021 | 1019 |
1022 // Coherence and non-linear filter | 1020 // Coherence and non-linear filter |
1023 float cohde[PART_LEN1], cohxd[PART_LEN1]; | 1021 float cohde[PART_LEN1], cohxd[PART_LEN1]; |
1024 float hNlDeAvg, hNlXdAvg; | 1022 float hNlDeAvg, hNlXdAvg; |
1025 float hNl[PART_LEN1]; | 1023 float hNl[PART_LEN1]; |
1026 float hNlPref[kPrefBandSize]; | 1024 float hNlPref[kPrefBandSize]; |
1027 float hNlFb = 0, hNlFbLow = 0; | 1025 float hNlFb = 0, hNlFbLow = 0; |
(...skipping 19 matching lines...) Expand all Loading... |
1047 // Windowed near-end ffts. | 1045 // Windowed near-end ffts. |
1048 WindowData(fft, aec->dBuf); | 1046 WindowData(fft, aec->dBuf); |
1049 aec_rdft_forward_128(fft); | 1047 aec_rdft_forward_128(fft); |
1050 StoreAsComplex(fft, dfw); | 1048 StoreAsComplex(fft, dfw); |
1051 | 1049 |
1052 // Windowed echo suppressor output ffts. | 1050 // Windowed echo suppressor output ffts. |
1053 WindowData(fft, aec->eBuf); | 1051 WindowData(fft, aec->eBuf); |
1054 aec_rdft_forward_128(fft); | 1052 aec_rdft_forward_128(fft); |
1055 StoreAsComplex(fft, efw); | 1053 StoreAsComplex(fft, efw); |
1056 | 1054 |
1057 aec->delayEstCtr++; | |
1058 if (aec->delayEstCtr == delayEstInterval) { | |
1059 aec->delayEstCtr = 0; | |
1060 } | |
1061 | |
1062 // We should always have at least one element stored in |far_buf|. | 1055 // We should always have at least one element stored in |far_buf|. |
1063 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); | 1056 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); |
1064 // NLP | 1057 // NLP |
1065 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); | 1058 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); |
1066 | 1059 |
1067 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of | 1060 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of |
1068 // |xfwBuf|. | 1061 // |xfwBuf|. |
1069 // Buffer far. | 1062 // Buffer far. |
1070 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); | 1063 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); |
1071 | 1064 |
1072 if (aec->delayEstCtr == 0) | 1065 aec->delayEstCtr++; |
| 1066 if (aec->delayEstCtr == delayEstInterval) { |
| 1067 aec->delayEstCtr = 0; |
1073 aec->delayIdx = WebRtcAec_PartitionDelay(aec); | 1068 aec->delayIdx = WebRtcAec_PartitionDelay(aec); |
| 1069 } |
1074 | 1070 |
1075 // Use delayed far. | 1071 // Use delayed far. |
1076 memcpy(xfw, | 1072 memcpy(xfw, |
1077 aec->xfwBuf + aec->delayIdx * PART_LEN1, | 1073 aec->xfwBuf + aec->delayIdx * PART_LEN1, |
1078 sizeof(xfw[0][0]) * 2 * PART_LEN1); | 1074 sizeof(xfw[0][0]) * 2 * PART_LEN1); |
1079 | 1075 |
1080 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, | 1076 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, |
1081 &aec->extreme_filter_divergence); | 1077 &aec->extreme_filter_divergence); |
1082 | 1078 |
1083 // Select the microphone signal as output if the filter is deemed to have | 1079 // Select the microphone signal as output if the filter is deemed to have |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1183 | 1179 |
1184 // TODO(bjornv): Investigate how to take the windowing below into account if | 1180 // TODO(bjornv): Investigate how to take the windowing below into account if |
1185 // needed. | 1181 // needed. |
1186 if (aec->metricsMode == 1) { | 1182 if (aec->metricsMode == 1) { |
1187 // Note that we have a scaling by two in the time domain |eBuf|. | 1183 // Note that we have a scaling by two in the time domain |eBuf|. |
1188 // In addition the time domain signal is windowed before transformation, | 1184 // In addition the time domain signal is windowed before transformation, |
1189 // losing half the energy on the average. We take care of the first | 1185 // losing half the energy on the average. We take care of the first |
1190 // scaling only in UpdateMetrics(). | 1186 // scaling only in UpdateMetrics(). |
1191 UpdateLevel(&aec->nlpoutlevel, efw); | 1187 UpdateLevel(&aec->nlpoutlevel, efw); |
1192 } | 1188 } |
| 1189 |
1193 // Inverse error fft. | 1190 // Inverse error fft. |
1194 fft[0] = efw[0][0]; | 1191 ScaledInverseFft(efw, fft, 2.0f, 1); |
1195 fft[1] = efw[0][PART_LEN]; | |
1196 for (i = 1; i < PART_LEN; i++) { | |
1197 fft[2 * i] = efw[0][i]; | |
1198 // Sign change required by Ooura fft. | |
1199 fft[2 * i + 1] = -efw[1][i]; | |
1200 } | |
1201 aec_rdft_inverse_128(fft); | |
1202 | 1192 |
1203 // Overlap and add to obtain output. | 1193 // Overlap and add to obtain output. |
1204 scale = 2.0f / PART_LEN2; | |
1205 for (i = 0; i < PART_LEN; i++) { | 1194 for (i = 0; i < PART_LEN; i++) { |
1206 fft[i] *= scale; // fft scaling | 1195 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + |
1207 fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; | 1196 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); |
1208 | |
1209 fft[PART_LEN + i] *= scale; // fft scaling | |
1210 aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; | |
1211 | 1197 |
1212 // Saturate output to keep it in the allowed range. | 1198 // Saturate output to keep it in the allowed range. |
1213 output[i] = WEBRTC_SPL_SAT( | 1199 output[i] = WEBRTC_SPL_SAT( |
1214 WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); | 1200 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); |
1215 } | 1201 } |
| 1202 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); |
1216 | 1203 |
1217 // For H band | 1204 // For H band |
1218 if (aec->num_bands > 1) { | 1205 if (aec->num_bands > 1) { |
1219 | |
1220 // H band gain | 1206 // H band gain |
1221 // average nlp over low band: average over second half of freq spectrum | 1207 // average nlp over low band: average over second half of freq spectrum |
1222 // (4->8khz) | 1208 // (4->8khz) |
1223 GetHighbandGain(hNl, &nlpGainHband); | 1209 GetHighbandGain(hNl, &nlpGainHband); |
1224 | 1210 |
1225 // Inverse comfort_noise | 1211 // Inverse comfort_noise |
1226 if (flagHbandCn == 1) { | 1212 ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0); |
1227 fft[0] = comfortNoiseHband[0][0]; | |
1228 fft[1] = comfortNoiseHband[0][PART_LEN]; | |
1229 for (i = 1; i < PART_LEN; i++) { | |
1230 fft[2 * i] = comfortNoiseHband[0][i]; | |
1231 fft[2 * i + 1] = comfortNoiseHband[1][i]; | |
1232 } | |
1233 aec_rdft_inverse_128(fft); | |
1234 scale = 2.0f / PART_LEN2; | |
1235 } | |
1236 | 1213 |
1237 // compute gain factor | 1214 // compute gain factor |
1238 for (j = 0; j < aec->num_bands - 1; ++j) { | 1215 for (j = 0; j < aec->num_bands - 1; ++j) { |
1239 for (i = 0; i < PART_LEN; i++) { | 1216 for (i = 0; i < PART_LEN; i++) { |
1240 dtmp = aec->dBufH[j][i]; | 1217 outputH[j][i] = aec->dBufH[j][i] * nlpGainHband; |
1241 dtmp = dtmp * nlpGainHband; // for variable gain | |
1242 | |
1243 // add some comfort noise where Hband is attenuated | |
1244 if (flagHbandCn == 1 && j == 0) { | |
1245 fft[i] *= scale; // fft scaling | |
1246 dtmp += cnScaleHband * fft[i]; | |
1247 } | |
1248 | |
1249 // Saturate output to keep it in the allowed range. | |
1250 outputH[j][i] = WEBRTC_SPL_SAT( | |
1251 WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); | |
1252 } | 1218 } |
1253 } | 1219 } |
| 1220 |
| 1221 // Add some comfort noise where Hband is attenuated. |
| 1222 for (i = 0; i < PART_LEN; i++) { |
| 1223 outputH[0][i] += cnScaleHband * fft[i]; |
| 1224 } |
| 1225 |
| 1226 // Saturate output to keep it in the allowed range. |
| 1227 for (j = 0; j < aec->num_bands - 1; ++j) { |
| 1228 for (i = 0; i < PART_LEN; i++) { |
| 1229 outputH[j][i] = WEBRTC_SPL_SAT( |
| 1230 WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN); |
| 1231 } |
| 1232 } |
| 1233 |
1254 } | 1234 } |
1255 | 1235 |
1256 // Copy the current block to the old position. | 1236 // Copy the current block to the old position. |
1257 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); | 1237 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); |
1258 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); | 1238 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); |
1259 | 1239 |
1260 // Copy the current block to the old position for H band | 1240 // Copy the current block to the old position for H band |
1261 for (j = 0; j < aec->num_bands - 1; ++j) { | 1241 for (j = 0; j < aec->num_bands - 1; ++j) { |
1262 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); | 1242 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); |
1263 } | 1243 } |
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1981 int WebRtcAec_extended_filter_enabled(AecCore* self) { | 1961 int WebRtcAec_extended_filter_enabled(AecCore* self) { |
1982 return self->extended_filter_enabled; | 1962 return self->extended_filter_enabled; |
1983 } | 1963 } |
1984 | 1964 |
1985 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } | 1965 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } |
1986 | 1966 |
1987 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { | 1967 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { |
1988 assert(delay >= 0); | 1968 assert(delay >= 0); |
1989 self->system_delay = delay; | 1969 self->system_delay = delay; |
1990 } | 1970 } |
OLD | NEW |