webrtc/modules/audio_coding/neteq/expand.cc - Issue 1908623002: Avoiding overflow in cross correlation in NetEq.

Side by Side Diff: webrtc/modules/audio_coding/neteq/expand.cc

Issue 1908623002: Avoiding overflow in cross correlation in NetEq. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: on comments Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/expand.h"	11 #include "webrtc/modules/audio_coding/neteq/expand.h"

12	12

13 #include <assert.h>	13 #include <assert.h>

14 #include <string.h> // memset	14 #include <string.h> // memset

15	15

16 #include <algorithm> // min, max	16 #include <algorithm> // min, max

17 #include <limits> // numeric_limits<T>	17 #include <limits> // numeric_limits<T>

18	18

19 #include "webrtc/base/safe_conversions.h"	19 #include "webrtc/base/safe_conversions.h"

20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"	21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

23 #include "webrtc/modules/audio_coding/neteq/random_vector.h"	23 #include "webrtc/modules/audio_coding/neteq/random_vector.h"

24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"	24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"

25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"	25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"

26	26

27 namespace webrtc {	27 namespace webrtc {

28	28

	29 namespace {

	30

	31 // This function decides the overflow-protecting scaling and call

	32 // WebRtcSpl_CrossCorrelation.

	33 void CrossCorrelation(int32_t* cross_correlation,

	34 const int16_t* sequence_1,

	35 const int16_t* sequence_2,

	36 size_t sequence_1_length,

	37 size_t cross_correlation_length,

	38 int* right_shifts,

	39 int cross_correlation_step) {

	40 // Find the maximum absolute value of sequence_1 and 2.

	41 const int16_t max_1 = WebRtcSpl_MaxAbsValueW16(sequence_1, sequence_1_length);

	42 const int sequence_2_shift =

	43 cross_correlation_step * (cross_correlation_length - 1);

	44 const int16_t* sequence_2_start =

	45 sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift;

	46 const size_t sequence_2_length = sequence_1_length + abs(sequence_2_shift);

	47 const int16_t max_2 =

	48 WebRtcSpl_MaxAbsValueW16(sequence_2_start, sequence_2_length);

	49

	50 // In order to avoid overflow when computing the sum we should scale the

	51 // samples so that (in_vector_length * max_1 * max_2) will not overflow.

	52 // Expected scaling fulfills

	53 // 1) sufficient:

	54 // sequence_1_length * (max_1 * max_2 >> scaling) <= 0x7fffffff;

	55 // 2) necessary:

	56 // if (scaling > 0)

	57 // sequence_1_length * (max_1 * max_2 >> (scaling - 1)) > 0x7fffffff;

	58 // The following calculation fulfills 1) and almost fulfills 2).

	59 // There are some corner cases that 2) is not satisfied, e.g.,

	60 // max_1 = 17, max_2 = 30848, sequence_1_length = 4095, in such case,

	61 // optimal scaling is 0, while the following calculation results in 1.

	62 const int32_t factor = max_1 * max_2 / (std::numeric_limits<int32_t>::max() /
	hlundin-webrtc 2016/04/22 06:48:56 Even though it is true that AB/C will be evaluate Even though it is true that AB/C will be evaluated as (AB)/C, I think you can help the reader by explicitly adding the parentheses around max_1max_2. minyue-webrtc 2016/04/22 13:58:30 Done. Show quoted text On 2016/04/22 06:48:56, hlundin-webrtc wrote: > Even though it is true that AB/C will be evaluated as (AB)/C, I think you can > help the reader by explicitly adding the parentheses around max_1*max_2. Done.
	63 static_cast<int32_t>(sequence_1_length));

	64 const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);

	65

	66 assert((double)max_1 * max_2 * sequence_1_length / (1 << scaling) <=

	67 WEBRTC_SPL_WORD32_MAX);

	68 assert(scaling == 0 \|\|

	69 (double)max_1 * max_2 * sequence_1_length /(1 << scaling) * 2 >

	70 WEBRTC_SPL_WORD32_MAX);

	71

	72 WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2,

	73 sequence_1_length, cross_correlation_length,

	74 scaling, cross_correlation_step);

	75 if (right_shifts)

	76 *right_shifts = scaling;

	77 }

	78

	79 } // namespace

	80

29 Expand::Expand(BackgroundNoise* background_noise,	81 Expand::Expand(BackgroundNoise* background_noise,

30 SyncBuffer* sync_buffer,	82 SyncBuffer* sync_buffer,

31 RandomVector* random_vector,	83 RandomVector* random_vector,

32 StatisticsCalculator* statistics,	84 StatisticsCalculator* statistics,

33 int fs,	85 int fs,

34 size_t num_channels)	86 size_t num_channels)

35 : random_vector_(random_vector),	87 : random_vector_(random_vector),

36 sync_buffer_(sync_buffer),	88 sync_buffer_(sync_buffer),

37 first_expand_(true),	89 first_expand_(true),

38 fs_hz_(fs),	90 fs_hz_(fs),

(...skipping 333 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
372 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;	424 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;

373	425

374 const size_t signal_length = static_cast<size_t>(256 * fs_mult);	426 const size_t signal_length = static_cast<size_t>(256 * fs_mult);

375 const int16_t* audio_history =	427 const int16_t* audio_history =

376 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];	428 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];

377	429

378 // Initialize.	430 // Initialize.

379 InitializeForAnExpandPeriod();	431 InitializeForAnExpandPeriod();

380	432

381 // Calculate correlation in downsampled domain (4 kHz sample rate).	433 // Calculate correlation in downsampled domain (4 kHz sample rate).

382 int correlation_scale;

383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.	434 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.

384 // If it is decided to break bit-exactness \|correlation_length\| should be	435 // If it is decided to break bit-exactness \|correlation_length\| should be

385 // initialized to the return value of Correlation().	436 // initialized to the return value of Correlation().

386 Correlation(audio_history, signal_length, correlation_vector,	437 Correlation(audio_history, signal_length, correlation_vector);

387 &correlation_scale);

388	438

389 // Find peaks in correlation vector.	439 // Find peaks in correlation vector.

390 DspHelper::PeakDetection(correlation_vector, correlation_length,	440 DspHelper::PeakDetection(correlation_vector, correlation_length,

391 kNumCorrelationCandidates, fs_mult,	441 kNumCorrelationCandidates, fs_mult,

392 best_correlation_index, best_correlation);	442 best_correlation_index, best_correlation);

393	443

394 // Adjust peak locations; cross-correlation lags start at 2.5 ms	444 // Adjust peak locations; cross-correlation lags start at 2.5 ms

395 // (20 * fs_mult samples).	445 // (20 * fs_mult samples).

396 best_correlation_index[0] += fs_mult_20;	446 best_correlation_index[0] += fs_mult_20;

397 best_correlation_index[1] += fs_mult_20;	447 best_correlation_index[1] += fs_mult_20;

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
443 std::max(std::min(distortion_lag + 10, fs_mult_120),	493 std::max(std::min(distortion_lag + 10, fs_mult_120),

444 static_cast<size_t>(60 * fs_mult));	494 static_cast<size_t>(60 * fs_mult));

445	495

446 size_t start_index = std::min(distortion_lag, correlation_lag);	496 size_t start_index = std::min(distortion_lag, correlation_lag);

447 size_t correlation_lags = static_cast<size_t>(	497 size_t correlation_lags = static_cast<size_t>(

448 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);	498 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);

449 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));	499 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));

450	500

451 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {	501 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {

452 ChannelParameters& parameters = channel_parameters_[channel_ix];	502 ChannelParameters& parameters = channel_parameters_[channel_ix];

453 // Calculate suitable scaling.	503

454 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(	504 int correlation_scale;

455 &audio_history[signal_length - correlation_length - start_index

456 - correlation_lags],

457 correlation_length + start_index + correlation_lags - 1);

458 correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +

459 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;

460 correlation_scale = std::max(0, correlation_scale);

461	505

462 // Calculate the correlation, store in \|correlation_vector2\|.	506 // Calculate the correlation, store in \|correlation_vector2\|.

463 WebRtcSpl_CrossCorrelation(	507 CrossCorrelation(

464 correlation_vector2,	508 correlation_vector2,

465 &(audio_history[signal_length - correlation_length]),	509 &(audio_history[signal_length - correlation_length]),

466 &(audio_history[signal_length - correlation_length - start_index]),	510 &(audio_history[signal_length - correlation_length - start_index]),

467 correlation_length, correlation_lags, correlation_scale, -1);	511 correlation_length, correlation_lags, &correlation_scale, -1);

468	512

469 // Find maximizing index.	513 // Find maximizing index.

470 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);	514 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);

471 int32_t max_correlation = correlation_vector2[best_index];	515 int32_t max_correlation = correlation_vector2[best_index];

472 // Compensate index with start offset.	516 // Compensate index with start offset.

473 best_index = best_index + start_index;	517 best_index = best_index + start_index;

474	518

475 // Calculate energies.	519 // Calculate energies.

476 int32_t energy1 = WebRtcSpl_DotProductWithScale(	520 int32_t energy1 = WebRtcSpl_DotProductWithScale(

477 &(audio_history[signal_length - correlation_length]),	521 &(audio_history[signal_length - correlation_length]),

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
575 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;	619 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;

576 // Third lag is the average again, but rounding towards \|correlation_lag\|.	620 // Third lag is the average again, but rounding towards \|correlation_lag\|.

577 if (distortion_lag > correlation_lag) {	621 if (distortion_lag > correlation_lag) {

578 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;	622 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;

579 } else {	623 } else {

580 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;	624 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;

581 }	625 }

582 }	626 }

583	627

584 // Calculate the LPC and the gain of the filters.	628 // Calculate the LPC and the gain of the filters.

585 // Calculate scale value needed for auto-correlation.

586 correlation_scale = WebRtcSpl_MaxAbsValueW16(

587 &(audio_history[signal_length - fs_mult_lpc_analysis_len]),

588 fs_mult_lpc_analysis_len);

589

590 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);

591 correlation_scale = std::max(correlation_scale * 2 + 7, 0);

592	629

593 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.	630 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.

594 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -	631 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -

595 kUnvoicedLpcOrder;	632 kUnvoicedLpcOrder;

596 // Copy signal to temporary vector to be able to pad with leading zeros.	633 // Copy signal to temporary vector to be able to pad with leading zeros.

597 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len	634 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len

598 + kUnvoicedLpcOrder];	635 + kUnvoicedLpcOrder];

599 memset(temp_signal, 0,	636 memset(temp_signal, 0,

600 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));	637 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));

601 memcpy(&temp_signal[kUnvoicedLpcOrder],	638 memcpy(&temp_signal[kUnvoicedLpcOrder],

602 &audio_history[temp_index + kUnvoicedLpcOrder],	639 &audio_history[temp_index + kUnvoicedLpcOrder],

603 sizeof(int16_t) * fs_mult_lpc_analysis_len);	640 sizeof(int16_t) * fs_mult_lpc_analysis_len);

604 WebRtcSpl_CrossCorrelation(auto_correlation,	641 CrossCorrelation(auto_correlation,

605 &temp_signal[kUnvoicedLpcOrder],	642 &temp_signal[kUnvoicedLpcOrder],

606 &temp_signal[kUnvoicedLpcOrder],	643 &temp_signal[kUnvoicedLpcOrder],

607 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,	644 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,

608 correlation_scale, -1);	645 &correlation_scale, -1);

609 delete [] temp_signal;	646 delete [] temp_signal;

610	647

611 // Verify that variance is positive.	648 // Verify that variance is positive.

612 if (auto_correlation[0] > 0) {	649 if (auto_correlation[0] > 0) {

613 // Estimate AR filter parameters using Levinson-Durbin algorithm;	650 // Estimate AR filter parameters using Levinson-Durbin algorithm;

614 // kUnvoicedLpcOrder + 1 filter coefficients.	651 // kUnvoicedLpcOrder + 1 filter coefficients.

615 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,	652 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,

616 parameters.ar_filter,	653 parameters.ar_filter,

617 reflection_coeff,	654 reflection_coeff,

618 kUnvoicedLpcOrder);	655 kUnvoicedLpcOrder);

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
759 voice_mix_factor(0),	796 voice_mix_factor(0),

760 current_voice_mix_factor(0),	797 current_voice_mix_factor(0),

761 onset(false),	798 onset(false),

762 mute_slope(0) {	799 mute_slope(0) {

763 memset(ar_filter, 0, sizeof(ar_filter));	800 memset(ar_filter, 0, sizeof(ar_filter));

764 memset(ar_filter_state, 0, sizeof(ar_filter_state));	801 memset(ar_filter_state, 0, sizeof(ar_filter_state));

765 }	802 }

766	803

767 void Expand::Correlation(const int16_t* input,	804 void Expand::Correlation(const int16_t* input,

768 size_t input_length,	805 size_t input_length,

769 int16_t* output,	806 int16_t* output) const {

770 int* output_scale) const {

771 // Set parameters depending on sample rate.	807 // Set parameters depending on sample rate.

772 const int16_t* filter_coefficients;	808 const int16_t* filter_coefficients;

773 size_t num_coefficients;	809 size_t num_coefficients;

774 int16_t downsampling_factor;	810 int16_t downsampling_factor;

775 if (fs_hz_ == 8000) {	811 if (fs_hz_ == 8000) {

776 num_coefficients = 3;	812 num_coefficients = 3;

777 downsampling_factor = 2;	813 downsampling_factor = 2;

778 filter_coefficients = DspHelper::kDownsample8kHzTbl;	814 filter_coefficients = DspHelper::kDownsample8kHzTbl;

779 } else if (fs_hz_ == 16000) {	815 } else if (fs_hz_ == 16000) {

780 num_coefficients = 5;	816 num_coefficients = 5;

(...skipping 26 matching lines...) Expand all Loading...
807 downsampling_factor, kFilterDelay);	843 downsampling_factor, kFilterDelay);

808	844

809 // Normalize \|downsampled_input\| to using all 16 bits.	845 // Normalize \|downsampled_input\| to using all 16 bits.

810 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,	846 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,

811 kDownsampledLength);	847 kDownsampledLength);

812 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);	848 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);

813 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,	849 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,

814 downsampled_input, norm_shift);	850 downsampled_input, norm_shift);

815	851

816 int32_t correlation[kNumCorrelationLags];	852 int32_t correlation[kNumCorrelationLags];

817 static const int kCorrelationShift = 6;	853 CrossCorrelation(

818 WebRtcSpl_CrossCorrelation(

819 correlation,	854 correlation,

820 &downsampled_input[kDownsampledLength - kCorrelationLength],	855 &downsampled_input[kDownsampledLength - kCorrelationLength],

821 &downsampled_input[kDownsampledLength - kCorrelationLength	856 &downsampled_input[kDownsampledLength - kCorrelationLength

822 - kCorrelationStartLag],	857 - kCorrelationStartLag],

823 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);	858 kCorrelationLength, kNumCorrelationLags, nullptr, -1);

824	859

825 // Normalize and move data from 32-bit to 16-bit vector.	860 // Normalize and move data from 32-bit to 16-bit vector.

826 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,	861 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,

827 kNumCorrelationLags);	862 kNumCorrelationLags);

828 int16_t norm_shift2 = static_cast<int16_t>(	863 int16_t norm_shift2 = static_cast<int16_t>(

829 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));	864 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));

830 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,	865 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,

831 norm_shift2);	866 norm_shift2);

832 // Total scale factor (right shifts) of correlation value.

833 output_scale = 2 norm_shift + kCorrelationShift + norm_shift2;

834 }	867 }

835	868

836 void Expand::UpdateLagIndex() {	869 void Expand::UpdateLagIndex() {

837 current_lag_index_ = current_lag_index_ + lag_index_direction_;	870 current_lag_index_ = current_lag_index_ + lag_index_direction_;

838 // Change direction if needed.	871 // Change direction if needed.

839 if (current_lag_index_ <= 0) {	872 if (current_lag_index_ <= 0) {

840 lag_index_direction_ = 1;	873 lag_index_direction_ = 1;

841 }	874 }

842 if (current_lag_index_ >= kNumLags - 1) {	875 if (current_lag_index_ >= kNumLags - 1) {

843 lag_index_direction_ = -1;	876 lag_index_direction_ = -1;

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
953 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;	986 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;

954 while (samples_generated < length) {	987 while (samples_generated < length) {

955 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);	988 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);

956 random_vector_->IncreaseSeedIncrement(seed_increment);	989 random_vector_->IncreaseSeedIncrement(seed_increment);

957 random_vector_->Generate(rand_length, &random_vector[samples_generated]);	990 random_vector_->Generate(rand_length, &random_vector[samples_generated]);

958 samples_generated += rand_length;	991 samples_generated += rand_length;

959 }	992 }

960 }	993 }

961	994

962 } // namespace webrtc	995 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_coding/neteq/expand.h ('k') | no next file » | no next file with comments »