OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 547 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
558 InitLevel(&self->nearlevel); | 558 InitLevel(&self->nearlevel); |
559 InitLevel(&self->linoutlevel); | 559 InitLevel(&self->linoutlevel); |
560 InitLevel(&self->nlpoutlevel); | 560 InitLevel(&self->nlpoutlevel); |
561 | 561 |
562 InitStats(&self->erl); | 562 InitStats(&self->erl); |
563 InitStats(&self->erle); | 563 InitStats(&self->erle); |
564 InitStats(&self->aNlp); | 564 InitStats(&self->aNlp); |
565 InitStats(&self->rerl); | 565 InitStats(&self->rerl); |
566 } | 566 } |
567 | 567 |
568 static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { | 568 static float CalculatePower(const float* in, size_t num_samples) { |
569 // Do the energy calculation in the frequency domain. The FFT is performed on | 569 size_t k; |
570 // a segment of PART_LEN2 samples due to overlap, but we only want the energy | 570 float energy = 0.0f; |
571 // of half that data (the last PART_LEN samples). Parseval's relation states | |
572 // that the energy is preserved according to | |
573 // | |
574 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 | |
575 // = ENERGY, | |
576 // | |
577 // where N = PART_LEN2. Since we are only interested in calculating the energy | |
578 // for the last PART_LEN samples we approximate by calculating ENERGY and | |
579 // divide by 2, | |
580 // | |
581 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 | |
582 // | |
583 // Since we deal with real valued time domain signals we only store frequency | |
584 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we | |
585 // need to add the contribution from the missing part in | |
586 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical | |
587 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This | |
588 // is the values in the for loop below, but multiplication by 2 and division | |
589 // by 2 cancel. | |
590 | 571 |
591 // TODO(bjornv): Investigate reusing energy calculations performed at other | 572 for (k = 0; k < num_samples; ++k) { |
592 // places in the code. | 573 energy += in[k] * in[k]; |
593 int k = 1; | 574 } |
594 // Imaginary parts are zero at end points and left out of the calculation. | 575 return energy / num_samples; |
peah-webrtc
2015/12/22 10:54:32
I'm a bit concerned with this computation, while i
minyue-webrtc
2015/12/22 11:20:01
Your finding is very true, and that is the reason,
peah-webrtc
2016/01/08 13:12:59
I totally agree that it is better to use the power
| |
595 float energy = (in[0][0] * in[0][0]) / 2; | 576 } |
596 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; | |
597 | 577 |
598 for (k = 1; k < PART_LEN; k++) { | 578 static void UpdateLevel(PowerLevel* level, float energy) { |
599 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); | |
600 } | |
601 energy /= PART_LEN2; | |
602 | |
603 level->sfrsum += energy; | 579 level->sfrsum += energy; |
604 level->sfrcounter++; | 580 level->sfrcounter++; |
605 | 581 |
606 if (level->sfrcounter > subCountLen) { | 582 if (level->sfrcounter > subCountLen) { |
607 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); | 583 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); |
608 level->sfrsum = 0; | 584 level->sfrsum = 0; |
609 level->sfrcounter = 0; | 585 level->sfrcounter = 0; |
610 if (level->framelevel > 0) { | 586 if (level->framelevel > 0) { |
611 if (level->framelevel < level->minlevel) { | 587 if (level->framelevel < level->minlevel) { |
612 level->minlevel = level->framelevel; // New minimum. | 588 level->minlevel = level->framelevel; // New minimum. |
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
839 freq_data[1][0] = 0; | 815 freq_data[1][0] = 0; |
840 freq_data[1][PART_LEN] = 0; | 816 freq_data[1][PART_LEN] = 0; |
841 freq_data[0][0] = time_data[0]; | 817 freq_data[0][0] = time_data[0]; |
842 freq_data[0][PART_LEN] = time_data[1]; | 818 freq_data[0][PART_LEN] = time_data[1]; |
843 for (i = 1; i < PART_LEN; i++) { | 819 for (i = 1; i < PART_LEN; i++) { |
844 freq_data[0][i] = time_data[2 * i]; | 820 freq_data[0][i] = time_data[2 * i]; |
845 freq_data[1][i] = time_data[2 * i + 1]; | 821 freq_data[1][i] = time_data[2 * i + 1]; |
846 } | 822 } |
847 } | 823 } |
848 | 824 |
849 | |
850 static int SignalBasedDelayCorrection(AecCore* self) { | 825 static int SignalBasedDelayCorrection(AecCore* self) { |
851 int delay_correction = 0; | 826 int delay_correction = 0; |
852 int last_delay = -2; | 827 int last_delay = -2; |
853 assert(self != NULL); | 828 assert(self != NULL); |
854 #if !defined(WEBRTC_ANDROID) | 829 #if !defined(WEBRTC_ANDROID) |
855 // On desktops, turn on correction after |kDelayCorrectionStart| frames. This | 830 // On desktops, turn on correction after |kDelayCorrectionStart| frames. This |
856 // is to let the delay estimation get a chance to converge. Also, if the | 831 // is to let the delay estimation get a chance to converge. Also, if the |
857 // playout audio volume is low (or even muted) the delay estimation can return | 832 // playout audio volume is low (or even muted) the delay estimation can return |
858 // a very large delay, which will break the AEC if it is applied. | 833 // a very large delay, which will break the AEC if it is applied. |
859 if (self->frame_count < kDelayCorrectionStart) { | 834 if (self->frame_count < kDelayCorrectionStart) { |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
972 Fft(e_extended, e_fft); | 947 Fft(e_extended, e_fft); |
973 | 948 |
974 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, | 949 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, |
975 &e_fft[0][0], | 950 &e_fft[0][0], |
976 sizeof(e_fft[0][0]) * PART_LEN1 * 2); | 951 sizeof(e_fft[0][0]) * PART_LEN1 * 2); |
977 | 952 |
978 if (metrics_mode == 1) { | 953 if (metrics_mode == 1) { |
979 // Note that the first PART_LEN samples in fft (before transformation) are | 954 // Note that the first PART_LEN samples in fft (before transformation) are |
980 // zero. Hence, the scaling by two in UpdateLevel() should not be | 955 // zero. Hence, the scaling by two in UpdateLevel() should not be |
981 // performed. That scaling is taken care of in UpdateMetrics() instead. | 956 // performed. That scaling is taken care of in UpdateMetrics() instead. |
982 UpdateLevel(linout_level, e_fft); | 957 UpdateLevel(linout_level, CalculatePower(e, PART_LEN) / 2.0f); |
983 } | 958 } |
984 | 959 |
985 // Scale error signal inversely with far power. | 960 // Scale error signal inversely with far power. |
986 WebRtcAec_ScaleErrorSignal(extended_filter_enabled, | 961 WebRtcAec_ScaleErrorSignal(extended_filter_enabled, |
987 normal_mu, | 962 normal_mu, |
988 normal_error_threshold, | 963 normal_error_threshold, |
989 x_pow, | 964 x_pow, |
990 e_fft); | 965 e_fft); |
991 WebRtcAec_FilterAdaptation(num_partitions, | 966 WebRtcAec_FilterAdaptation(num_partitions, |
992 x_fft_buf_block_pos, | 967 x_fft_buf_block_pos, |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1164 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; | 1139 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; |
1165 } else { | 1140 } else { |
1166 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; | 1141 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; |
1167 } | 1142 } |
1168 | 1143 |
1169 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); | 1144 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); |
1170 | 1145 |
1171 // Add comfort noise. | 1146 // Add comfort noise. |
1172 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); | 1147 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); |
1173 | 1148 |
1149 // Inverse error fft. | |
1150 ScaledInverseFft(efw, fft, 2.0f, 1); | |
1151 | |
1174 // TODO(bjornv): Investigate how to take the windowing below into account if | 1152 // TODO(bjornv): Investigate how to take the windowing below into account if |
1175 // needed. | 1153 // needed. |
1176 if (aec->metricsMode == 1) { | 1154 if (aec->metricsMode == 1) { |
1177 // Note that we have a scaling by two in the time domain |eBuf|. | 1155 // Note that we have a scaling by two in the time domain |eBuf|. |
1178 // In addition the time domain signal is windowed before transformation, | 1156 // In addition the time domain signal is windowed before transformation, |
1179 // losing half the energy on the average. We take care of the first | 1157 // losing half the energy on the average. We take care of the first |
1180 // scaling only in UpdateMetrics(). | 1158 // scaling only in UpdateMetrics(). |
1181 UpdateLevel(&aec->nlpoutlevel, efw); | 1159 UpdateLevel(&aec->nlpoutlevel, CalculatePower(fft, PART_LEN2)); |
1182 } | 1160 } |
1183 | 1161 |
1184 // Inverse error fft. | |
1185 ScaledInverseFft(efw, fft, 2.0f, 1); | |
1186 | |
1187 // Overlap and add to obtain output. | 1162 // Overlap and add to obtain output. |
1188 for (i = 0; i < PART_LEN; i++) { | 1163 for (i = 0; i < PART_LEN; i++) { |
1189 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + | 1164 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + |
1190 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); | 1165 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); |
1191 | 1166 |
1192 // Saturate output to keep it in the allowed range. | 1167 // Saturate output to keep it in the allowed range. |
1193 output[i] = WEBRTC_SPL_SAT( | 1168 output[i] = WEBRTC_SPL_SAT( |
1194 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); | 1169 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); |
1195 } | 1170 } |
1196 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); | 1171 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1297 | 1272 |
1298 RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); | 1273 RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); |
1299 } | 1274 } |
1300 #endif | 1275 #endif |
1301 | 1276 |
1302 // Convert far-end signal to the frequency domain. | 1277 // Convert far-end signal to the frequency domain. |
1303 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); | 1278 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); |
1304 Fft(fft, xf); | 1279 Fft(fft, xf); |
1305 xf_ptr = &xf[0][0]; | 1280 xf_ptr = &xf[0][0]; |
1306 | 1281 |
1282 if (aec->metricsMode == 1) { | |
1283 // Update power levels | |
1284 UpdateLevel(&aec->farlevel, CalculatePower(farend_ptr, PART_LEN2)); | |
1285 } | |
1286 | |
1307 // Near fft | 1287 // Near fft |
1308 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); | 1288 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); |
1309 Fft(fft, df); | 1289 Fft(fft, df); |
1310 | 1290 |
1291 if (aec->metricsMode == 1) { | |
peah-webrtc
2015/12/22 10:54:32
I think it is better to bundle the UpdateLevel cal
peah-webrtc
2016/01/08 13:12:59
PTAL
minyue-webrtc
2016/01/14 16:53:51
sure, will do.
| |
1292 UpdateLevel(&aec->nearlevel, CalculatePower(aec->dBuf, PART_LEN2)); | |
1293 } | |
1294 | |
1311 // Power smoothing | 1295 // Power smoothing |
1312 for (i = 0; i < PART_LEN1; i++) { | 1296 for (i = 0; i < PART_LEN1; i++) { |
1313 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + | 1297 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + |
1314 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); | 1298 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); |
1315 aec->xPow[i] = | 1299 aec->xPow[i] = |
1316 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; | 1300 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; |
1317 // Calculate absolute spectra | 1301 // Calculate absolute spectra |
1318 abs_far_spectrum[i] = sqrtf(far_spectrum); | 1302 abs_far_spectrum[i] = sqrtf(far_spectrum); |
1319 | 1303 |
1320 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; | 1304 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1398 aec->wfBuf, | 1382 aec->wfBuf, |
1399 &aec->linoutlevel, | 1383 &aec->linoutlevel, |
1400 echo_subtractor_output); | 1384 echo_subtractor_output); |
1401 | 1385 |
1402 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); | 1386 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); |
1403 | 1387 |
1404 // Perform echo suppression. | 1388 // Perform echo suppression. |
1405 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); | 1389 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); |
1406 | 1390 |
1407 if (aec->metricsMode == 1) { | 1391 if (aec->metricsMode == 1) { |
1408 // Update power levels and echo metrics | |
1409 UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr); | |
1410 UpdateLevel(&aec->nearlevel, df); | |
1411 UpdateMetrics(aec); | 1392 UpdateMetrics(aec); |
1412 } | 1393 } |
1413 | 1394 |
1414 // Store the output block. | 1395 // Store the output block. |
1415 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); | 1396 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); |
1416 // For high bands | 1397 // For high bands |
1417 for (i = 0; i < aec->num_bands - 1; ++i) { | 1398 for (i = 0; i < aec->num_bands - 1; ++i) { |
1418 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); | 1399 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); |
1419 } | 1400 } |
1420 | 1401 |
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1941 int WebRtcAec_extended_filter_enabled(AecCore* self) { | 1922 int WebRtcAec_extended_filter_enabled(AecCore* self) { |
1942 return self->extended_filter_enabled; | 1923 return self->extended_filter_enabled; |
1943 } | 1924 } |
1944 | 1925 |
1945 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } | 1926 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } |
1946 | 1927 |
1947 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { | 1928 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { |
1948 assert(delay >= 0); | 1929 assert(delay >= 0); |
1949 self->system_delay = delay; | 1930 self->system_delay = delay; |
1950 } | 1931 } |
OLD | NEW |