Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(497)

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core.c

Issue 1542573002: Calculate audio levels in AEC in time domain. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: normalizing |noisePower| Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 547 matching lines...) Expand 10 before | Expand all | Expand 10 after
558 InitLevel(&self->nearlevel); 558 InitLevel(&self->nearlevel);
559 InitLevel(&self->linoutlevel); 559 InitLevel(&self->linoutlevel);
560 InitLevel(&self->nlpoutlevel); 560 InitLevel(&self->nlpoutlevel);
561 561
562 InitStats(&self->erl); 562 InitStats(&self->erl);
563 InitStats(&self->erle); 563 InitStats(&self->erle);
564 InitStats(&self->aNlp); 564 InitStats(&self->aNlp);
565 InitStats(&self->rerl); 565 InitStats(&self->rerl);
566 } 566 }
567 567
568 static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { 568 static float CalculatePower(const float* in, size_t num_samples) {
569 // Do the energy calculation in the frequency domain. The FFT is performed on 569 size_t k;
570 // a segment of PART_LEN2 samples due to overlap, but we only want the energy 570 float energy = 0.0f;
571 // of half that data (the last PART_LEN samples). Parseval's relation states
572 // that the energy is preserved according to
573 //
574 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
575 // = ENERGY,
576 //
577 // where N = PART_LEN2. Since we are only interested in calculating the energy
578 // for the last PART_LEN samples we approximate by calculating ENERGY and
579 // divide by 2,
580 //
581 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
582 //
583 // Since we deal with real valued time domain signals we only store frequency
584 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
585 // need to add the contribution from the missing part in
586 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
587 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
588 // is the values in the for loop below, but multiplication by 2 and division
589 // by 2 cancel.
590 571
591 // TODO(bjornv): Investigate reusing energy calculations performed at other 572 for (k = 0; k < num_samples; ++k) {
592 // places in the code. 573 energy += in[k] * in[k];
593 int k = 1; 574 }
594 // Imaginary parts are zero at end points and left out of the calculation. 575 return energy / num_samples;
595 float energy = (in[0][0] * in[0][0]) / 2; 576 }
596 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
597 577
598 for (k = 1; k < PART_LEN; k++) { 578 static void UpdateLevel(PowerLevel* level, float energy) {
599 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
600 }
601 energy /= PART_LEN2;
602
603 level->sfrsum += energy; 579 level->sfrsum += energy;
604 level->sfrcounter++; 580 level->sfrcounter++;
605 581
606 if (level->sfrcounter > subCountLen) { 582 if (level->sfrcounter > subCountLen) {
607 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); 583 level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
608 level->sfrsum = 0; 584 level->sfrsum = 0;
609 level->sfrcounter = 0; 585 level->sfrcounter = 0;
610 if (level->framelevel > 0) { 586 if (level->framelevel > 0) {
611 if (level->framelevel < level->minlevel) { 587 if (level->framelevel < level->minlevel) {
612 level->minlevel = level->framelevel; // New minimum. 588 level->minlevel = level->framelevel; // New minimum.
(...skipping 10 matching lines...) Expand all
623 } 599 }
624 } 600 }
625 } 601 }
626 602
627 static void UpdateMetrics(AecCore* aec) { 603 static void UpdateMetrics(AecCore* aec) {
628 float dtmp, dtmp2; 604 float dtmp, dtmp2;
629 605
630 const float actThresholdNoisy = 8.0f; 606 const float actThresholdNoisy = 8.0f;
631 const float actThresholdClean = 40.0f; 607 const float actThresholdClean = 40.0f;
632 const float safety = 0.99995f; 608 const float safety = 0.99995f;
633 const float noisyPower = 300000.0f; 609
610 // To make noisePower consistent with the legacy code, a factor of
611 // 2.0f / PART_LEN2 is applied to noisyPower, since the legacy code uses
612 // the energy of a frame as the audio levels, while the new code uses a
613 // a per-sample energy (i.e., power).
614 const float noisyPower = 300000.0f * 2.0f / PART_LEN2;
634 615
635 float actThreshold; 616 float actThreshold;
636 float echo, suppressedEcho; 617 float echo, suppressedEcho;
637 618
638 if (aec->echoState) { // Check if echo is likely present 619 if (aec->echoState) { // Check if echo is likely present
639 aec->stateCounter++; 620 aec->stateCounter++;
640 } 621 }
641 622
642 if (aec->farlevel.frcounter == 0) { 623 if (aec->farlevel.frcounter == 0) {
643 624
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
839 freq_data[1][0] = 0; 820 freq_data[1][0] = 0;
840 freq_data[1][PART_LEN] = 0; 821 freq_data[1][PART_LEN] = 0;
841 freq_data[0][0] = time_data[0]; 822 freq_data[0][0] = time_data[0];
842 freq_data[0][PART_LEN] = time_data[1]; 823 freq_data[0][PART_LEN] = time_data[1];
843 for (i = 1; i < PART_LEN; i++) { 824 for (i = 1; i < PART_LEN; i++) {
844 freq_data[0][i] = time_data[2 * i]; 825 freq_data[0][i] = time_data[2 * i];
845 freq_data[1][i] = time_data[2 * i + 1]; 826 freq_data[1][i] = time_data[2 * i + 1];
846 } 827 }
847 } 828 }
848 829
849
850 static int SignalBasedDelayCorrection(AecCore* self) { 830 static int SignalBasedDelayCorrection(AecCore* self) {
851 int delay_correction = 0; 831 int delay_correction = 0;
852 int last_delay = -2; 832 int last_delay = -2;
853 assert(self != NULL); 833 assert(self != NULL);
854 #if !defined(WEBRTC_ANDROID) 834 #if !defined(WEBRTC_ANDROID)
855 // On desktops, turn on correction after |kDelayCorrectionStart| frames. This 835 // On desktops, turn on correction after |kDelayCorrectionStart| frames. This
856 // is to let the delay estimation get a chance to converge. Also, if the 836 // is to let the delay estimation get a chance to converge. Also, if the
857 // playout audio volume is low (or even muted) the delay estimation can return 837 // playout audio volume is low (or even muted) the delay estimation can return
858 // a very large delay, which will break the AEC if it is applied. 838 // a very large delay, which will break the AEC if it is applied.
859 if (self->frame_count < kDelayCorrectionStart) { 839 if (self->frame_count < kDelayCorrectionStart) {
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
972 Fft(e_extended, e_fft); 952 Fft(e_extended, e_fft);
973 953
974 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, 954 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
975 &e_fft[0][0], 955 &e_fft[0][0],
976 sizeof(e_fft[0][0]) * PART_LEN1 * 2); 956 sizeof(e_fft[0][0]) * PART_LEN1 * 2);
977 957
978 if (metrics_mode == 1) { 958 if (metrics_mode == 1) {
979 // Note that the first PART_LEN samples in fft (before transformation) are 959 // Note that the first PART_LEN samples in fft (before transformation) are
980 // zero. Hence, the scaling by two in UpdateLevel() should not be 960 // zero. Hence, the scaling by two in UpdateLevel() should not be
981 // performed. That scaling is taken care of in UpdateMetrics() instead. 961 // performed. That scaling is taken care of in UpdateMetrics() instead.
982 UpdateLevel(linout_level, e_fft); 962 UpdateLevel(linout_level, CalculatePower(e, PART_LEN) / 2.0f);
983 } 963 }
984 964
985 // Scale error signal inversely with far power. 965 // Scale error signal inversely with far power.
986 WebRtcAec_ScaleErrorSignal(extended_filter_enabled, 966 WebRtcAec_ScaleErrorSignal(extended_filter_enabled,
987 normal_mu, 967 normal_mu,
988 normal_error_threshold, 968 normal_error_threshold,
989 x_pow, 969 x_pow,
990 e_fft); 970 e_fft);
991 WebRtcAec_FilterAdaptation(num_partitions, 971 WebRtcAec_FilterAdaptation(num_partitions,
992 x_fft_buf_block_pos, 972 x_fft_buf_block_pos,
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after
1164 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; 1144 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
1165 } else { 1145 } else {
1166 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; 1146 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
1167 } 1147 }
1168 1148
1169 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); 1149 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
1170 1150
1171 // Add comfort noise. 1151 // Add comfort noise.
1172 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); 1152 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
1173 1153
1154 // Inverse error fft.
1155 ScaledInverseFft(efw, fft, 2.0f, 1);
1156
1174 // TODO(bjornv): Investigate how to take the windowing below into account if 1157 // TODO(bjornv): Investigate how to take the windowing below into account if
1175 // needed. 1158 // needed.
1176 if (aec->metricsMode == 1) { 1159 if (aec->metricsMode == 1) {
1177 // Note that we have a scaling by two in the time domain |eBuf|. 1160 // Note that we have a scaling by two in the time domain |eBuf|.
1178 // In addition the time domain signal is windowed before transformation, 1161 // In addition the time domain signal is windowed before transformation,
1179 // losing half the energy on the average. We take care of the first 1162 // losing half the energy on the average. We take care of the first
1180 // scaling only in UpdateMetrics(). 1163 // scaling only in UpdateMetrics().
1181 UpdateLevel(&aec->nlpoutlevel, efw); 1164 UpdateLevel(&aec->nlpoutlevel, CalculatePower(fft, PART_LEN2));
1182 } 1165 }
1183 1166
1184 // Inverse error fft.
1185 ScaledInverseFft(efw, fft, 2.0f, 1);
1186
1187 // Overlap and add to obtain output. 1167 // Overlap and add to obtain output.
1188 for (i = 0; i < PART_LEN; i++) { 1168 for (i = 0; i < PART_LEN; i++) {
1189 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + 1169 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +
1190 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); 1170 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);
1191 1171
1192 // Saturate output to keep it in the allowed range. 1172 // Saturate output to keep it in the allowed range.
1193 output[i] = WEBRTC_SPL_SAT( 1173 output[i] = WEBRTC_SPL_SAT(
1194 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); 1174 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);
1195 } 1175 }
1196 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); 1176 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
1301 1281
1302 // Convert far-end signal to the frequency domain. 1282 // Convert far-end signal to the frequency domain.
1303 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); 1283 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2);
1304 Fft(fft, xf); 1284 Fft(fft, xf);
1305 xf_ptr = &xf[0][0]; 1285 xf_ptr = &xf[0][0];
1306 1286
1307 // Near fft 1287 // Near fft
1308 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); 1288 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
1309 Fft(fft, df); 1289 Fft(fft, df);
1310 1290
1291 if (aec->metricsMode == 1) {
1292 // Update power levels
1293 UpdateLevel(&aec->farlevel, CalculatePower(farend_ptr, PART_LEN2));
1294 UpdateLevel(&aec->nearlevel, CalculatePower(aec->dBuf, PART_LEN2));
1295 }
1296
1311 // Power smoothing 1297 // Power smoothing
1312 for (i = 0; i < PART_LEN1; i++) { 1298 for (i = 0; i < PART_LEN1; i++) {
1313 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + 1299 far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
1314 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); 1300 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
1315 aec->xPow[i] = 1301 aec->xPow[i] =
1316 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; 1302 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
1317 // Calculate absolute spectra 1303 // Calculate absolute spectra
1318 abs_far_spectrum[i] = sqrtf(far_spectrum); 1304 abs_far_spectrum[i] = sqrtf(far_spectrum);
1319 1305
1320 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; 1306 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
1398 aec->wfBuf, 1384 aec->wfBuf,
1399 &aec->linoutlevel, 1385 &aec->linoutlevel,
1400 echo_subtractor_output); 1386 echo_subtractor_output);
1401 1387
1402 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); 1388 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);
1403 1389
1404 // Perform echo suppression. 1390 // Perform echo suppression.
1405 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); 1391 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);
1406 1392
1407 if (aec->metricsMode == 1) { 1393 if (aec->metricsMode == 1) {
1408 // Update power levels and echo metrics
1409 UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
1410 UpdateLevel(&aec->nearlevel, df);
1411 UpdateMetrics(aec); 1394 UpdateMetrics(aec);
1412 } 1395 }
1413 1396
1414 // Store the output block. 1397 // Store the output block.
1415 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); 1398 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
1416 // For high bands 1399 // For high bands
1417 for (i = 0; i < aec->num_bands - 1; ++i) { 1400 for (i = 0; i < aec->num_bands - 1; ++i) {
1418 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); 1401 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
1419 } 1402 }
1420 1403
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after
1941 int WebRtcAec_extended_filter_enabled(AecCore* self) { 1924 int WebRtcAec_extended_filter_enabled(AecCore* self) {
1942 return self->extended_filter_enabled; 1925 return self->extended_filter_enabled;
1943 } 1926 }
1944 1927
1945 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } 1928 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
1946 1929
1947 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { 1930 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
1948 assert(delay >= 0); 1931 assert(delay >= 0);
1949 self->system_delay = delay; 1932 self->system_delay = delay;
1950 } 1933 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698