webrtc/modules/audio_coding/neteq/expand.cc - Issue 1908623002: Avoiding overflow in cross correlation in NetEq.

Side by Side Diff: webrtc/modules/audio_coding/neteq/expand.cc

Issue 1908623002: Avoiding overflow in cross correlation in NetEq. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: turn off ubsan as it was Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/expand.h"	11 #include "webrtc/modules/audio_coding/neteq/expand.h"

12	12

13 #include <assert.h>	13 #include <assert.h>

14 #include <string.h> // memset	14 #include <string.h> // memset

15	15

16 #include <algorithm> // min, max	16 #include <algorithm> // min, max

17 #include <limits> // numeric_limits<T>	17 #include <limits> // numeric_limits<T>

18	18

19 #include "webrtc/base/safe_conversions.h"	19 #include "webrtc/base/safe_conversions.h"

20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"	21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

	22 #include "webrtc/modules/audio_coding/neteq/cross_correlation.h"

22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	23 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

23 #include "webrtc/modules/audio_coding/neteq/random_vector.h"	24 #include "webrtc/modules/audio_coding/neteq/random_vector.h"

24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"	25 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"

25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"	26 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"

26	27

27 namespace webrtc {	28 namespace webrtc {

28	29

29 Expand::Expand(BackgroundNoise* background_noise,	30 Expand::Expand(BackgroundNoise* background_noise,

30 SyncBuffer* sync_buffer,	31 SyncBuffer* sync_buffer,

31 RandomVector* random_vector,	32 RandomVector* random_vector,

(...skipping 340 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
372 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;	373 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;

373	374

374 const size_t signal_length = static_cast<size_t>(256 * fs_mult);	375 const size_t signal_length = static_cast<size_t>(256 * fs_mult);

375 const int16_t* audio_history =	376 const int16_t* audio_history =

376 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];	377 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];

377	378

378 // Initialize.	379 // Initialize.

379 InitializeForAnExpandPeriod();	380 InitializeForAnExpandPeriod();

380	381

381 // Calculate correlation in downsampled domain (4 kHz sample rate).	382 // Calculate correlation in downsampled domain (4 kHz sample rate).

382 int correlation_scale;

383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.	383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.

384 // If it is decided to break bit-exactness \|correlation_length\| should be	384 // If it is decided to break bit-exactness \|correlation_length\| should be

385 // initialized to the return value of Correlation().	385 // initialized to the return value of Correlation().

386 Correlation(audio_history, signal_length, correlation_vector,	386 Correlation(audio_history, signal_length, correlation_vector);

387 &correlation_scale);

388	387

389 // Find peaks in correlation vector.	388 // Find peaks in correlation vector.

390 DspHelper::PeakDetection(correlation_vector, correlation_length,	389 DspHelper::PeakDetection(correlation_vector, correlation_length,

391 kNumCorrelationCandidates, fs_mult,	390 kNumCorrelationCandidates, fs_mult,

392 best_correlation_index, best_correlation);	391 best_correlation_index, best_correlation);

393	392

394 // Adjust peak locations; cross-correlation lags start at 2.5 ms	393 // Adjust peak locations; cross-correlation lags start at 2.5 ms

395 // (20 * fs_mult samples).	394 // (20 * fs_mult samples).

396 best_correlation_index[0] += fs_mult_20;	395 best_correlation_index[0] += fs_mult_20;

397 best_correlation_index[1] += fs_mult_20;	396 best_correlation_index[1] += fs_mult_20;

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
443 std::max(std::min(distortion_lag + 10, fs_mult_120),	442 std::max(std::min(distortion_lag + 10, fs_mult_120),

444 static_cast<size_t>(60 * fs_mult));	443 static_cast<size_t>(60 * fs_mult));

445	444

446 size_t start_index = std::min(distortion_lag, correlation_lag);	445 size_t start_index = std::min(distortion_lag, correlation_lag);

447 size_t correlation_lags = static_cast<size_t>(	446 size_t correlation_lags = static_cast<size_t>(

448 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);	447 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);

449 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));	448 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));

450	449

451 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {	450 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {

452 ChannelParameters& parameters = channel_parameters_[channel_ix];	451 ChannelParameters& parameters = channel_parameters_[channel_ix];

453 // Calculate suitable scaling.

454 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(

455 &audio_history[signal_length - correlation_length - start_index

456 - correlation_lags],

457 correlation_length + start_index + correlation_lags - 1);

458 correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +

459 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;

460 correlation_scale = std::max(0, correlation_scale);

461	452

462 // Calculate the correlation, store in \|correlation_vector2\|.	453 // Calculate the correlation, store in \|correlation_vector2\|.

463 WebRtcSpl_CrossCorrelation(	454 int correlation_scale = CrossCorrelationWithAutoShift(

464 correlation_vector2,

465 &(audio_history[signal_length - correlation_length]),	455 &(audio_history[signal_length - correlation_length]),

466 &(audio_history[signal_length - correlation_length - start_index]),	456 &(audio_history[signal_length - correlation_length - start_index]),

467 correlation_length, correlation_lags, correlation_scale, -1);	457 correlation_length, correlation_lags, -1, correlation_vector2);

468	458

469 // Find maximizing index.	459 // Find maximizing index.

470 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);	460 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);

471 int32_t max_correlation = correlation_vector2[best_index];	461 int32_t max_correlation = correlation_vector2[best_index];

472 // Compensate index with start offset.	462 // Compensate index with start offset.

473 best_index = best_index + start_index;	463 best_index = best_index + start_index;

474	464

475 // Calculate energies.	465 // Calculate energies.

476 int32_t energy1 = WebRtcSpl_DotProductWithScale(	466 int32_t energy1 = WebRtcSpl_DotProductWithScale(

477 &(audio_history[signal_length - correlation_length]),	467 &(audio_history[signal_length - correlation_length]),

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
575 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;	565 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;

576 // Third lag is the average again, but rounding towards \|correlation_lag\|.	566 // Third lag is the average again, but rounding towards \|correlation_lag\|.

577 if (distortion_lag > correlation_lag) {	567 if (distortion_lag > correlation_lag) {

578 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;	568 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;

579 } else {	569 } else {

580 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;	570 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;

581 }	571 }

582 }	572 }

583	573

584 // Calculate the LPC and the gain of the filters.	574 // Calculate the LPC and the gain of the filters.

585 // Calculate scale value needed for auto-correlation.

586 correlation_scale = WebRtcSpl_MaxAbsValueW16(

587 &(audio_history[signal_length - fs_mult_lpc_analysis_len]),

588 fs_mult_lpc_analysis_len);

589

590 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);

591 correlation_scale = std::max(correlation_scale * 2 + 7, 0);

592	575

593 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.	576 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.

594 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -	577 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -

595 kUnvoicedLpcOrder;	578 kUnvoicedLpcOrder;

596 // Copy signal to temporary vector to be able to pad with leading zeros.	579 // Copy signal to temporary vector to be able to pad with leading zeros.

597 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len	580 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len

598 + kUnvoicedLpcOrder];	581 + kUnvoicedLpcOrder];

599 memset(temp_signal, 0,	582 memset(temp_signal, 0,

600 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));	583 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));

601 memcpy(&temp_signal[kUnvoicedLpcOrder],	584 memcpy(&temp_signal[kUnvoicedLpcOrder],

602 &audio_history[temp_index + kUnvoicedLpcOrder],	585 &audio_history[temp_index + kUnvoicedLpcOrder],

603 sizeof(int16_t) * fs_mult_lpc_analysis_len);	586 sizeof(int16_t) * fs_mult_lpc_analysis_len);

604 WebRtcSpl_CrossCorrelation(auto_correlation,	587 correlation_scale = CrossCorrelationWithAutoShift(

605 &temp_signal[kUnvoicedLpcOrder],	588 &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],

606 &temp_signal[kUnvoicedLpcOrder],	589 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);

607 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,

608 correlation_scale, -1);

609 delete [] temp_signal;	590 delete [] temp_signal;

610	591

611 // Verify that variance is positive.	592 // Verify that variance is positive.

612 if (auto_correlation[0] > 0) {	593 if (auto_correlation[0] > 0) {

613 // Estimate AR filter parameters using Levinson-Durbin algorithm;	594 // Estimate AR filter parameters using Levinson-Durbin algorithm;

614 // kUnvoicedLpcOrder + 1 filter coefficients.	595 // kUnvoicedLpcOrder + 1 filter coefficients.

615 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,	596 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,

616 parameters.ar_filter,	597 parameters.ar_filter,

617 reflection_coeff,	598 reflection_coeff,

618 kUnvoicedLpcOrder);	599 kUnvoicedLpcOrder);

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
759 voice_mix_factor(0),	740 voice_mix_factor(0),

760 current_voice_mix_factor(0),	741 current_voice_mix_factor(0),

761 onset(false),	742 onset(false),

762 mute_slope(0) {	743 mute_slope(0) {

763 memset(ar_filter, 0, sizeof(ar_filter));	744 memset(ar_filter, 0, sizeof(ar_filter));

764 memset(ar_filter_state, 0, sizeof(ar_filter_state));	745 memset(ar_filter_state, 0, sizeof(ar_filter_state));

765 }	746 }

766	747

767 void Expand::Correlation(const int16_t* input,	748 void Expand::Correlation(const int16_t* input,

768 size_t input_length,	749 size_t input_length,

769 int16_t* output,	750 int16_t* output) const {

770 int* output_scale) const {

771 // Set parameters depending on sample rate.	751 // Set parameters depending on sample rate.

772 const int16_t* filter_coefficients;	752 const int16_t* filter_coefficients;

773 size_t num_coefficients;	753 size_t num_coefficients;

774 int16_t downsampling_factor;	754 int16_t downsampling_factor;

775 if (fs_hz_ == 8000) {	755 if (fs_hz_ == 8000) {

776 num_coefficients = 3;	756 num_coefficients = 3;

777 downsampling_factor = 2;	757 downsampling_factor = 2;

778 filter_coefficients = DspHelper::kDownsample8kHzTbl;	758 filter_coefficients = DspHelper::kDownsample8kHzTbl;

779 } else if (fs_hz_ == 16000) {	759 } else if (fs_hz_ == 16000) {

780 num_coefficients = 5;	760 num_coefficients = 5;

(...skipping 26 matching lines...) Expand all Loading...
807 downsampling_factor, kFilterDelay);	787 downsampling_factor, kFilterDelay);

808	788

809 // Normalize \|downsampled_input\| to using all 16 bits.	789 // Normalize \|downsampled_input\| to using all 16 bits.

810 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,	790 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,

811 kDownsampledLength);	791 kDownsampledLength);

812 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);	792 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);

813 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,	793 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,

814 downsampled_input, norm_shift);	794 downsampled_input, norm_shift);

815	795

816 int32_t correlation[kNumCorrelationLags];	796 int32_t correlation[kNumCorrelationLags];

817 static const int kCorrelationShift = 6;	797 CrossCorrelationWithAutoShift(

818 WebRtcSpl_CrossCorrelation(

819 correlation,

820 &downsampled_input[kDownsampledLength - kCorrelationLength],	798 &downsampled_input[kDownsampledLength - kCorrelationLength],

821 &downsampled_input[kDownsampledLength - kCorrelationLength	799 &downsampled_input[kDownsampledLength - kCorrelationLength

822 - kCorrelationStartLag],	800 - kCorrelationStartLag],

823 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);	801 kCorrelationLength, kNumCorrelationLags, -1, correlation);

824	802

825 // Normalize and move data from 32-bit to 16-bit vector.	803 // Normalize and move data from 32-bit to 16-bit vector.

826 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,	804 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,

827 kNumCorrelationLags);	805 kNumCorrelationLags);

828 int16_t norm_shift2 = static_cast<int16_t>(	806 int16_t norm_shift2 = static_cast<int16_t>(

829 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));	807 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));

830 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,	808 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,

831 norm_shift2);	809 norm_shift2);

832 // Total scale factor (right shifts) of correlation value.

833 output_scale = 2 norm_shift + kCorrelationShift + norm_shift2;

834 }	810 }

835	811

836 void Expand::UpdateLagIndex() {	812 void Expand::UpdateLagIndex() {

837 current_lag_index_ = current_lag_index_ + lag_index_direction_;	813 current_lag_index_ = current_lag_index_ + lag_index_direction_;

838 // Change direction if needed.	814 // Change direction if needed.

839 if (current_lag_index_ <= 0) {	815 if (current_lag_index_ <= 0) {

840 lag_index_direction_ = 1;	816 lag_index_direction_ = 1;

841 }	817 }

842 if (current_lag_index_ >= kNumLags - 1) {	818 if (current_lag_index_ >= kNumLags - 1) {

843 lag_index_direction_ = -1;	819 lag_index_direction_ = -1;

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
953 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;	929 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;

954 while (samples_generated < length) {	930 while (samples_generated < length) {

955 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);	931 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);

956 random_vector_->IncreaseSeedIncrement(seed_increment);	932 random_vector_->IncreaseSeedIncrement(seed_increment);

957 random_vector_->Generate(rand_length, &random_vector[samples_generated]);	933 random_vector_->Generate(rand_length, &random_vector[samples_generated]);

958 samples_generated += rand_length;	934 samples_generated += rand_length;

959 }	935 }

960 }	936 }

961	937

962 } // namespace webrtc	938 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_coding/neteq/expand.h ('k') | webrtc/modules/audio_coding/neteq/merge.h » ('j') | no next file with comments »