webrtc/modules/audio_coding/neteq/expand.cc - Issue 1931933004: Reland "Avoiding overflow in cross correlation in NetEq."

Side by Side Diff: webrtc/modules/audio_coding/neteq/expand.cc

Issue 1931933004: Reland "Avoiding overflow in cross correlation in NetEq." (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: fixing bitexactness tests Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/expand.h"	11 #include "webrtc/modules/audio_coding/neteq/expand.h"

12	12

13 #include <assert.h>	13 #include <assert.h>

14 #include <string.h> // memset	14 #include <string.h> // memset

15	15

16 #include <algorithm> // min, max	16 #include <algorithm> // min, max

17 #include <limits> // numeric_limits<T>	17 #include <limits> // numeric_limits<T>

18	18

19 #include "webrtc/base/safe_conversions.h"	19 #include "webrtc/base/safe_conversions.h"

20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"	21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

	22 #include "webrtc/modules/audio_coding/neteq/cross_correlation.h"

22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	23 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

23 #include "webrtc/modules/audio_coding/neteq/random_vector.h"	24 #include "webrtc/modules/audio_coding/neteq/random_vector.h"

24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"	25 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"

25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"	26 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"

26	27

27 namespace webrtc {	28 namespace webrtc {

28	29

29 Expand::Expand(BackgroundNoise* background_noise,	30 Expand::Expand(BackgroundNoise* background_noise,

30 SyncBuffer* sync_buffer,	31 SyncBuffer* sync_buffer,

31 RandomVector* random_vector,	32 RandomVector* random_vector,

(...skipping 340 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
372 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;	373 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;

373	374

374 const size_t signal_length = static_cast<size_t>(256 * fs_mult);	375 const size_t signal_length = static_cast<size_t>(256 * fs_mult);

375 const int16_t* audio_history =	376 const int16_t* audio_history =

376 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];	377 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];

377	378

378 // Initialize.	379 // Initialize.

379 InitializeForAnExpandPeriod();	380 InitializeForAnExpandPeriod();

380	381

381 // Calculate correlation in downsampled domain (4 kHz sample rate).	382 // Calculate correlation in downsampled domain (4 kHz sample rate).

382 int correlation_scale;

383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.	383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.

384 // If it is decided to break bit-exactness \|correlation_length\| should be	384 // If it is decided to break bit-exactness \|correlation_length\| should be

385 // initialized to the return value of Correlation().	385 // initialized to the return value of Correlation().

386 Correlation(audio_history, signal_length, correlation_vector,	386 Correlation(audio_history, signal_length, correlation_vector);

387 &correlation_scale);

388	387

389 // Find peaks in correlation vector.	388 // Find peaks in correlation vector.

390 DspHelper::PeakDetection(correlation_vector, correlation_length,	389 DspHelper::PeakDetection(correlation_vector, correlation_length,

391 kNumCorrelationCandidates, fs_mult,	390 kNumCorrelationCandidates, fs_mult,

392 best_correlation_index, best_correlation);	391 best_correlation_index, best_correlation);

393	392

394 // Adjust peak locations; cross-correlation lags start at 2.5 ms	393 // Adjust peak locations; cross-correlation lags start at 2.5 ms

395 // (20 * fs_mult samples).	394 // (20 * fs_mult samples).

396 best_correlation_index[0] += fs_mult_20;	395 best_correlation_index[0] += fs_mult_20;

397 best_correlation_index[1] += fs_mult_20;	396 best_correlation_index[1] += fs_mult_20;

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
448 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);	447 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);

449 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));	448 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));

450	449

451 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {	450 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {

452 ChannelParameters& parameters = channel_parameters_[channel_ix];	451 ChannelParameters& parameters = channel_parameters_[channel_ix];

453 // Calculate suitable scaling.	452 // Calculate suitable scaling.

454 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(	453 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(

455 &audio_history[signal_length - correlation_length - start_index	454 &audio_history[signal_length - correlation_length - start_index

456 - correlation_lags],	455 - correlation_lags],

457 correlation_length + start_index + correlation_lags - 1);	456 correlation_length + start_index + correlation_lags - 1);

458 correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +	457 int correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +

459 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;	458 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;

460 correlation_scale = std::max(0, correlation_scale);	459 correlation_scale = std::max(0, correlation_scale);

461	460

462 // Calculate the correlation, store in \|correlation_vector2\|.	461 // Calculate the correlation, store in \|correlation_vector2\|.

463 WebRtcSpl_CrossCorrelation(	462 WebRtcSpl_CrossCorrelation(

464 correlation_vector2,	463 correlation_vector2,

465 &(audio_history[signal_length - correlation_length]),	464 &(audio_history[signal_length - correlation_length]),

466 &(audio_history[signal_length - correlation_length - start_index]),	465 &(audio_history[signal_length - correlation_length - start_index]),

467 correlation_length, correlation_lags, correlation_scale, -1);	466 correlation_length, correlation_lags, correlation_scale, -1);

468	467

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
575 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;	574 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;

576 // Third lag is the average again, but rounding towards \|correlation_lag\|.	575 // Third lag is the average again, but rounding towards \|correlation_lag\|.

577 if (distortion_lag > correlation_lag) {	576 if (distortion_lag > correlation_lag) {

578 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;	577 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;

579 } else {	578 } else {

580 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;	579 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;

581 }	580 }

582 }	581 }

583	582

584 // Calculate the LPC and the gain of the filters.	583 // Calculate the LPC and the gain of the filters.

585 // Calculate scale value needed for auto-correlation.

586 correlation_scale = WebRtcSpl_MaxAbsValueW16(

587 &(audio_history[signal_length - fs_mult_lpc_analysis_len]),

588 fs_mult_lpc_analysis_len);

589

590 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);

591 correlation_scale = std::max(correlation_scale * 2 + 7, 0);

592	584

593 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.	585 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.

594 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -	586 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -

595 kUnvoicedLpcOrder;	587 kUnvoicedLpcOrder;

596 // Copy signal to temporary vector to be able to pad with leading zeros.	588 // Copy signal to temporary vector to be able to pad with leading zeros.

597 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len	589 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len

598 + kUnvoicedLpcOrder];	590 + kUnvoicedLpcOrder];

599 memset(temp_signal, 0,	591 memset(temp_signal, 0,

600 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));	592 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));

601 memcpy(&temp_signal[kUnvoicedLpcOrder],	593 memcpy(&temp_signal[kUnvoicedLpcOrder],

602 &audio_history[temp_index + kUnvoicedLpcOrder],	594 &audio_history[temp_index + kUnvoicedLpcOrder],

603 sizeof(int16_t) * fs_mult_lpc_analysis_len);	595 sizeof(int16_t) * fs_mult_lpc_analysis_len);

604 WebRtcSpl_CrossCorrelation(auto_correlation,	596 CrossCorrelationWithAutoShift(

605 &temp_signal[kUnvoicedLpcOrder],	597 &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],

606 &temp_signal[kUnvoicedLpcOrder],	598 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);

607 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,

608 correlation_scale, -1);

609 delete [] temp_signal;	599 delete [] temp_signal;

610	600

611 // Verify that variance is positive.	601 // Verify that variance is positive.

612 if (auto_correlation[0] > 0) {	602 if (auto_correlation[0] > 0) {

613 // Estimate AR filter parameters using Levinson-Durbin algorithm;	603 // Estimate AR filter parameters using Levinson-Durbin algorithm;

614 // kUnvoicedLpcOrder + 1 filter coefficients.	604 // kUnvoicedLpcOrder + 1 filter coefficients.

615 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,	605 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,

616 parameters.ar_filter,	606 parameters.ar_filter,

617 reflection_coeff,	607 reflection_coeff,

618 kUnvoicedLpcOrder);	608 kUnvoicedLpcOrder);

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
759 voice_mix_factor(0),	749 voice_mix_factor(0),

760 current_voice_mix_factor(0),	750 current_voice_mix_factor(0),

761 onset(false),	751 onset(false),

762 mute_slope(0) {	752 mute_slope(0) {

763 memset(ar_filter, 0, sizeof(ar_filter));	753 memset(ar_filter, 0, sizeof(ar_filter));

764 memset(ar_filter_state, 0, sizeof(ar_filter_state));	754 memset(ar_filter_state, 0, sizeof(ar_filter_state));

765 }	755 }

766	756

767 void Expand::Correlation(const int16_t* input,	757 void Expand::Correlation(const int16_t* input,

768 size_t input_length,	758 size_t input_length,

769 int16_t* output,	759 int16_t* output) const {

770 int* output_scale) const {

771 // Set parameters depending on sample rate.	760 // Set parameters depending on sample rate.

772 const int16_t* filter_coefficients;	761 const int16_t* filter_coefficients;

773 size_t num_coefficients;	762 size_t num_coefficients;

774 int16_t downsampling_factor;	763 int16_t downsampling_factor;

775 if (fs_hz_ == 8000) {	764 if (fs_hz_ == 8000) {

776 num_coefficients = 3;	765 num_coefficients = 3;

777 downsampling_factor = 2;	766 downsampling_factor = 2;

778 filter_coefficients = DspHelper::kDownsample8kHzTbl;	767 filter_coefficients = DspHelper::kDownsample8kHzTbl;

779 } else if (fs_hz_ == 16000) {	768 } else if (fs_hz_ == 16000) {

780 num_coefficients = 5;	769 num_coefficients = 5;

(...skipping 26 matching lines...) Expand all Loading...
807 downsampling_factor, kFilterDelay);	796 downsampling_factor, kFilterDelay);

808	797

809 // Normalize \|downsampled_input\| to using all 16 bits.	798 // Normalize \|downsampled_input\| to using all 16 bits.

810 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,	799 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,

811 kDownsampledLength);	800 kDownsampledLength);

812 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);	801 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);

813 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,	802 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,

814 downsampled_input, norm_shift);	803 downsampled_input, norm_shift);

815	804

816 int32_t correlation[kNumCorrelationLags];	805 int32_t correlation[kNumCorrelationLags];

817 static const int kCorrelationShift = 6;	806 CrossCorrelationWithAutoShift(

818 WebRtcSpl_CrossCorrelation(

819 correlation,

820 &downsampled_input[kDownsampledLength - kCorrelationLength],	807 &downsampled_input[kDownsampledLength - kCorrelationLength],

821 &downsampled_input[kDownsampledLength - kCorrelationLength	808 &downsampled_input[kDownsampledLength - kCorrelationLength

822 - kCorrelationStartLag],	809 - kCorrelationStartLag],

823 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);	810 kCorrelationLength, kNumCorrelationLags, -1, correlation);

824	811

825 // Normalize and move data from 32-bit to 16-bit vector.	812 // Normalize and move data from 32-bit to 16-bit vector.

826 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,	813 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,

827 kNumCorrelationLags);	814 kNumCorrelationLags);

828 int16_t norm_shift2 = static_cast<int16_t>(	815 int16_t norm_shift2 = static_cast<int16_t>(

829 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));	816 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));

830 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,	817 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,

831 norm_shift2);	818 norm_shift2);

832 // Total scale factor (right shifts) of correlation value.

833 output_scale = 2 norm_shift + kCorrelationShift + norm_shift2;

834 }	819 }

835	820

836 void Expand::UpdateLagIndex() {	821 void Expand::UpdateLagIndex() {

837 current_lag_index_ = current_lag_index_ + lag_index_direction_;	822 current_lag_index_ = current_lag_index_ + lag_index_direction_;

838 // Change direction if needed.	823 // Change direction if needed.

839 if (current_lag_index_ <= 0) {	824 if (current_lag_index_ <= 0) {

840 lag_index_direction_ = 1;	825 lag_index_direction_ = 1;

841 }	826 }

842 if (current_lag_index_ >= kNumLags - 1) {	827 if (current_lag_index_ >= kNumLags - 1) {

843 lag_index_direction_ = -1;	828 lag_index_direction_ = -1;

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
953 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;	938 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;

954 while (samples_generated < length) {	939 while (samples_generated < length) {

955 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);	940 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);

956 random_vector_->IncreaseSeedIncrement(seed_increment);	941 random_vector_->IncreaseSeedIncrement(seed_increment);

957 random_vector_->Generate(rand_length, &random_vector[samples_generated]);	942 random_vector_->Generate(rand_length, &random_vector[samples_generated]);

958 samples_generated += rand_length;	943 samples_generated += rand_length;

959 }	944 }

960 }	945 }

961	946

962 } // namespace webrtc	947 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_coding/neteq/expand.h ('k') | webrtc/modules/audio_coding/neteq/merge.h » ('j') | no next file with comments »