webrtc/modules/audio_processing/aec/aec_core.c - Issue 1542573002: Calculate audio levels in AEC in time domain.

Side by Side Diff: webrtc/modules/audio_processing/aec/aec_core.c

Issue 1542573002: Calculate audio levels in AEC in time domain. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: normalizing |noisePower| Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 547 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
558 InitLevel(&self->nearlevel);	558 InitLevel(&self->nearlevel);

559 InitLevel(&self->linoutlevel);	559 InitLevel(&self->linoutlevel);

560 InitLevel(&self->nlpoutlevel);	560 InitLevel(&self->nlpoutlevel);

561	561

562 InitStats(&self->erl);	562 InitStats(&self->erl);

563 InitStats(&self->erle);	563 InitStats(&self->erle);

564 InitStats(&self->aNlp);	564 InitStats(&self->aNlp);

565 InitStats(&self->rerl);	565 InitStats(&self->rerl);

566 }	566 }

567	567

568 static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {	568 static float CalculatePower(const float* in, size_t num_samples) {

569 // Do the energy calculation in the frequency domain. The FFT is performed on	569 size_t k;

570 // a segment of PART_LEN2 samples due to overlap, but we only want the energy	570 float energy = 0.0f;

571 // of half that data (the last PART_LEN samples). Parseval's relation states

572 // that the energy is preserved according to

573 //

574 // \sum_{n=0}^{N-1} \|x(n)\|^2 = 1/N * \sum_{n=0}^{N-1} \|X(n)\|^2

575 // = ENERGY,

576 //

577 // where N = PART_LEN2. Since we are only interested in calculating the energy

578 // for the last PART_LEN samples we approximate by calculating ENERGY and

579 // divide by 2,

580 //

581 // \sum_{n=N/2}^{N-1} \|x(n)\|^2 ~= ENERGY / 2

582 //

583 // Since we deal with real valued time domain signals we only store frequency

584 // bins [0, PART_LEN], which is what \|in\| consists of. To calculate ENERGY we

585 // need to add the contribution from the missing part in

586 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical

587 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This

588 // is the values in the for loop below, but multiplication by 2 and division

589 // by 2 cancel.

590	571

591 // TODO(bjornv): Investigate reusing energy calculations performed at other	572 for (k = 0; k < num_samples; ++k) {

592 // places in the code.	573 energy += in[k] * in[k];

593 int k = 1;	574 }

594 // Imaginary parts are zero at end points and left out of the calculation.	575 return energy / num_samples;

595 float energy = (in[0][0] * in[0][0]) / 2;	576 }

596 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;

597	577

598 for (k = 1; k < PART_LEN; k++) {	578 static void UpdateLevel(PowerLevel* level, float energy) {

599 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);

600 }

601 energy /= PART_LEN2;

602

603 level->sfrsum += energy;	579 level->sfrsum += energy;

604 level->sfrcounter++;	580 level->sfrcounter++;

605	581

606 if (level->sfrcounter > subCountLen) {	582 if (level->sfrcounter > subCountLen) {

607 level->framelevel = level->sfrsum / (subCountLen * PART_LEN);	583 level->framelevel = level->sfrsum / (subCountLen * PART_LEN);

608 level->sfrsum = 0;	584 level->sfrsum = 0;

609 level->sfrcounter = 0;	585 level->sfrcounter = 0;

610 if (level->framelevel > 0) {	586 if (level->framelevel > 0) {

611 if (level->framelevel < level->minlevel) {	587 if (level->framelevel < level->minlevel) {

612 level->minlevel = level->framelevel; // New minimum.	588 level->minlevel = level->framelevel; // New minimum.

(...skipping 10 matching lines...) Expand all Loading...
623 }	599 }

624 }	600 }

625 }	601 }

626	602

627 static void UpdateMetrics(AecCore* aec) {	603 static void UpdateMetrics(AecCore* aec) {

628 float dtmp, dtmp2;	604 float dtmp, dtmp2;

629	605

630 const float actThresholdNoisy = 8.0f;	606 const float actThresholdNoisy = 8.0f;

631 const float actThresholdClean = 40.0f;	607 const float actThresholdClean = 40.0f;

632 const float safety = 0.99995f;	608 const float safety = 0.99995f;

633 const float noisyPower = 300000.0f;	609

	610 // To make noisePower consistent with the legacy code, a factor of

	611 // 2.0f / PART_LEN2 is applied to noisyPower, since the legacy code uses

	612 // the energy of a frame as the audio levels, while the new code uses a

	613 // a per-sample energy (i.e., power).

	614 const float noisyPower = 300000.0f * 2.0f / PART_LEN2;

634	615

635 float actThreshold;	616 float actThreshold;

636 float echo, suppressedEcho;	617 float echo, suppressedEcho;

637	618

638 if (aec->echoState) { // Check if echo is likely present	619 if (aec->echoState) { // Check if echo is likely present

639 aec->stateCounter++;	620 aec->stateCounter++;

640 }	621 }

641	622

642 if (aec->farlevel.frcounter == 0) {	623 if (aec->farlevel.frcounter == 0) {

643	624

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
839 freq_data[1][0] = 0;	820 freq_data[1][0] = 0;

840 freq_data[1][PART_LEN] = 0;	821 freq_data[1][PART_LEN] = 0;

841 freq_data[0][0] = time_data[0];	822 freq_data[0][0] = time_data[0];

842 freq_data[0][PART_LEN] = time_data[1];	823 freq_data[0][PART_LEN] = time_data[1];

843 for (i = 1; i < PART_LEN; i++) {	824 for (i = 1; i < PART_LEN; i++) {

844 freq_data[0][i] = time_data[2 * i];	825 freq_data[0][i] = time_data[2 * i];

845 freq_data[1][i] = time_data[2 * i + 1];	826 freq_data[1][i] = time_data[2 * i + 1];

846 }	827 }

847 }	828 }

848	829

849

850 static int SignalBasedDelayCorrection(AecCore* self) {	830 static int SignalBasedDelayCorrection(AecCore* self) {

851 int delay_correction = 0;	831 int delay_correction = 0;

852 int last_delay = -2;	832 int last_delay = -2;

853 assert(self != NULL);	833 assert(self != NULL);

854 #if !defined(WEBRTC_ANDROID)	834 #if !defined(WEBRTC_ANDROID)

855 // On desktops, turn on correction after \|kDelayCorrectionStart\| frames. This	835 // On desktops, turn on correction after \|kDelayCorrectionStart\| frames. This

856 // is to let the delay estimation get a chance to converge. Also, if the	836 // is to let the delay estimation get a chance to converge. Also, if the

857 // playout audio volume is low (or even muted) the delay estimation can return	837 // playout audio volume is low (or even muted) the delay estimation can return

858 // a very large delay, which will break the AEC if it is applied.	838 // a very large delay, which will break the AEC if it is applied.

859 if (self->frame_count < kDelayCorrectionStart) {	839 if (self->frame_count < kDelayCorrectionStart) {

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
972 Fft(e_extended, e_fft);	952 Fft(e_extended, e_fft);

973	953

974 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,	954 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,

975 &e_fft[0][0],	955 &e_fft[0][0],

976 sizeof(e_fft[0][0]) * PART_LEN1 * 2);	956 sizeof(e_fft[0][0]) * PART_LEN1 * 2);

977	957

978 if (metrics_mode == 1) {	958 if (metrics_mode == 1) {

979 // Note that the first PART_LEN samples in fft (before transformation) are	959 // Note that the first PART_LEN samples in fft (before transformation) are

980 // zero. Hence, the scaling by two in UpdateLevel() should not be	960 // zero. Hence, the scaling by two in UpdateLevel() should not be

981 // performed. That scaling is taken care of in UpdateMetrics() instead.	961 // performed. That scaling is taken care of in UpdateMetrics() instead.

982 UpdateLevel(linout_level, e_fft);	962 UpdateLevel(linout_level, CalculatePower(e, PART_LEN) / 2.0f);

983 }	963 }

984	964

985 // Scale error signal inversely with far power.	965 // Scale error signal inversely with far power.

986 WebRtcAec_ScaleErrorSignal(extended_filter_enabled,	966 WebRtcAec_ScaleErrorSignal(extended_filter_enabled,

987 normal_mu,	967 normal_mu,

988 normal_error_threshold,	968 normal_error_threshold,

989 x_pow,	969 x_pow,

990 e_fft);	970 e_fft);

991 WebRtcAec_FilterAdaptation(num_partitions,	971 WebRtcAec_FilterAdaptation(num_partitions,

992 x_fft_buf_block_pos,	972 x_fft_buf_block_pos,

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1164 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;	1144 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;

1165 } else {	1145 } else {

1166 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;	1146 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;

1167 }	1147 }

1168	1148

1169 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);	1149 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);

1170	1150

1171 // Add comfort noise.	1151 // Add comfort noise.

1172 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);	1152 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);

1173	1153

	1154 // Inverse error fft.

	1155 ScaledInverseFft(efw, fft, 2.0f, 1);

	1156

1174 // TODO(bjornv): Investigate how to take the windowing below into account if	1157 // TODO(bjornv): Investigate how to take the windowing below into account if

1175 // needed.	1158 // needed.

1176 if (aec->metricsMode == 1) {	1159 if (aec->metricsMode == 1) {

1177 // Note that we have a scaling by two in the time domain \|eBuf\|.	1160 // Note that we have a scaling by two in the time domain \|eBuf\|.

1178 // In addition the time domain signal is windowed before transformation,	1161 // In addition the time domain signal is windowed before transformation,

1179 // losing half the energy on the average. We take care of the first	1162 // losing half the energy on the average. We take care of the first

1180 // scaling only in UpdateMetrics().	1163 // scaling only in UpdateMetrics().

1181 UpdateLevel(&aec->nlpoutlevel, efw);	1164 UpdateLevel(&aec->nlpoutlevel, CalculatePower(fft, PART_LEN2));

1182 }	1165 }

1183	1166

1184 // Inverse error fft.

1185 ScaledInverseFft(efw, fft, 2.0f, 1);

1186

1187 // Overlap and add to obtain output.	1167 // Overlap and add to obtain output.

1188 for (i = 0; i < PART_LEN; i++) {	1168 for (i = 0; i < PART_LEN; i++) {

1189 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +	1169 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +

1190 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);	1170 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);

1191	1171

1192 // Saturate output to keep it in the allowed range.	1172 // Saturate output to keep it in the allowed range.

1193 output[i] = WEBRTC_SPL_SAT(	1173 output[i] = WEBRTC_SPL_SAT(

1194 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);	1174 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);

1195 }	1175 }

1196 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));	1176 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1301	1281

1302 // Convert far-end signal to the frequency domain.	1282 // Convert far-end signal to the frequency domain.

1303 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2);	1283 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2);

1304 Fft(fft, xf);	1284 Fft(fft, xf);

1305 xf_ptr = &xf[0][0];	1285 xf_ptr = &xf[0][0];

1306	1286

1307 // Near fft	1287 // Near fft

1308 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);	1288 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);

1309 Fft(fft, df);	1289 Fft(fft, df);

1310	1290

	1291 if (aec->metricsMode == 1) {

	1292 // Update power levels

	1293 UpdateLevel(&aec->farlevel, CalculatePower(farend_ptr, PART_LEN2));

	1294 UpdateLevel(&aec->nearlevel, CalculatePower(aec->dBuf, PART_LEN2));

	1295 }

	1296

1311 // Power smoothing	1297 // Power smoothing

1312 for (i = 0; i < PART_LEN1; i++) {	1298 for (i = 0; i < PART_LEN1; i++) {

1313 far_spectrum = (xf_ptr[i] * xf_ptr[i]) +	1299 far_spectrum = (xf_ptr[i] * xf_ptr[i]) +

1314 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);	1300 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);

1315 aec->xPow[i] =	1301 aec->xPow[i] =

1316 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;	1302 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;

1317 // Calculate absolute spectra	1303 // Calculate absolute spectra

1318 abs_far_spectrum[i] = sqrtf(far_spectrum);	1304 abs_far_spectrum[i] = sqrtf(far_spectrum);

1319	1305

1320 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];	1306 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1398 aec->wfBuf,	1384 aec->wfBuf,

1399 &aec->linoutlevel,	1385 &aec->linoutlevel,

1400 echo_subtractor_output);	1386 echo_subtractor_output);

1401	1387

1402 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);	1388 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);

1403	1389

1404 // Perform echo suppression.	1390 // Perform echo suppression.

1405 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);	1391 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);

1406	1392

1407 if (aec->metricsMode == 1) {	1393 if (aec->metricsMode == 1) {

1408 // Update power levels and echo metrics

1409 UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);

1410 UpdateLevel(&aec->nearlevel, df);

1411 UpdateMetrics(aec);	1394 UpdateMetrics(aec);

1412 }	1395 }

1413	1396

1414 // Store the output block.	1397 // Store the output block.

1415 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);	1398 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);

1416 // For high bands	1399 // For high bands

1417 for (i = 0; i < aec->num_bands - 1; ++i) {	1400 for (i = 0; i < aec->num_bands - 1; ++i) {

1418 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);	1401 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);

1419 }	1402 }

1420	1403

(...skipping 520 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1941 int WebRtcAec_extended_filter_enabled(AecCore* self) {	1924 int WebRtcAec_extended_filter_enabled(AecCore* self) {

1942 return self->extended_filter_enabled;	1925 return self->extended_filter_enabled;

1943 }	1926 }

1944	1927

1945 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }	1928 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }

1946	1929

1947 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {	1930 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {

1948 assert(delay >= 0);	1931 assert(delay >= 0);

1949 self->system_delay = delay;	1932 self->system_delay = delay;

1950 }	1933 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »