webrtc/modules/audio_coding/neteq/expand.cc - Issue 1925053002: Revert of Avoiding overflow in cross correlation in NetEq.

Side by Side Diff: webrtc/modules/audio_coding/neteq/expand.cc

Issue 1925053002: Revert of Avoiding overflow in cross correlation in NetEq. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/expand.h"	11 #include "webrtc/modules/audio_coding/neteq/expand.h"

12	12

13 #include <assert.h>	13 #include <assert.h>

14 #include <string.h> // memset	14 #include <string.h> // memset

15	15

16 #include <algorithm> // min, max	16 #include <algorithm> // min, max

17 #include <limits> // numeric_limits<T>	17 #include <limits> // numeric_limits<T>

18	18

19 #include "webrtc/base/safe_conversions.h"	19 #include "webrtc/base/safe_conversions.h"

20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"	21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

22 #include "webrtc/modules/audio_coding/neteq/cross_correlation.h"

23 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

24 #include "webrtc/modules/audio_coding/neteq/random_vector.h"	23 #include "webrtc/modules/audio_coding/neteq/random_vector.h"

25 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"	24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"

26 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"	25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"

27	26

28 namespace webrtc {	27 namespace webrtc {

29	28

30 Expand::Expand(BackgroundNoise* background_noise,	29 Expand::Expand(BackgroundNoise* background_noise,

31 SyncBuffer* sync_buffer,	30 SyncBuffer* sync_buffer,

32 RandomVector* random_vector,	31 RandomVector* random_vector,

(...skipping 340 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
373 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;	372 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;

374	373

375 const size_t signal_length = static_cast<size_t>(256 * fs_mult);	374 const size_t signal_length = static_cast<size_t>(256 * fs_mult);

376 const int16_t* audio_history =	375 const int16_t* audio_history =

377 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];	376 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];

378	377

379 // Initialize.	378 // Initialize.

380 InitializeForAnExpandPeriod();	379 InitializeForAnExpandPeriod();

381	380

382 // Calculate correlation in downsampled domain (4 kHz sample rate).	381 // Calculate correlation in downsampled domain (4 kHz sample rate).

	382 int correlation_scale;

383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.	383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.

384 // If it is decided to break bit-exactness \|correlation_length\| should be	384 // If it is decided to break bit-exactness \|correlation_length\| should be

385 // initialized to the return value of Correlation().	385 // initialized to the return value of Correlation().

386 Correlation(audio_history, signal_length, correlation_vector);	386 Correlation(audio_history, signal_length, correlation_vector,

	387 &correlation_scale);

387	388

388 // Find peaks in correlation vector.	389 // Find peaks in correlation vector.

389 DspHelper::PeakDetection(correlation_vector, correlation_length,	390 DspHelper::PeakDetection(correlation_vector, correlation_length,

390 kNumCorrelationCandidates, fs_mult,	391 kNumCorrelationCandidates, fs_mult,

391 best_correlation_index, best_correlation);	392 best_correlation_index, best_correlation);

392	393

393 // Adjust peak locations; cross-correlation lags start at 2.5 ms	394 // Adjust peak locations; cross-correlation lags start at 2.5 ms

394 // (20 * fs_mult samples).	395 // (20 * fs_mult samples).

395 best_correlation_index[0] += fs_mult_20;	396 best_correlation_index[0] += fs_mult_20;

396 best_correlation_index[1] += fs_mult_20;	397 best_correlation_index[1] += fs_mult_20;

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
442 std::max(std::min(distortion_lag + 10, fs_mult_120),	443 std::max(std::min(distortion_lag + 10, fs_mult_120),

443 static_cast<size_t>(60 * fs_mult));	444 static_cast<size_t>(60 * fs_mult));

444	445

445 size_t start_index = std::min(distortion_lag, correlation_lag);	446 size_t start_index = std::min(distortion_lag, correlation_lag);

446 size_t correlation_lags = static_cast<size_t>(	447 size_t correlation_lags = static_cast<size_t>(

447 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);	448 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);

448 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));	449 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));

449	450

450 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {	451 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {

451 ChannelParameters& parameters = channel_parameters_[channel_ix];	452 ChannelParameters& parameters = channel_parameters_[channel_ix];

	453 // Calculate suitable scaling.

	454 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(

	455 &audio_history[signal_length - correlation_length - start_index

	456 - correlation_lags],

	457 correlation_length + start_index + correlation_lags - 1);

	458 correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +

	459 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;

	460 correlation_scale = std::max(0, correlation_scale);

452	461

453 // Calculate the correlation, store in \|correlation_vector2\|.	462 // Calculate the correlation, store in \|correlation_vector2\|.

454 int correlation_scale = CrossCorrelationWithAutoShift(	463 WebRtcSpl_CrossCorrelation(

	464 correlation_vector2,

455 &(audio_history[signal_length - correlation_length]),	465 &(audio_history[signal_length - correlation_length]),

456 &(audio_history[signal_length - correlation_length - start_index]),	466 &(audio_history[signal_length - correlation_length - start_index]),

457 correlation_length, correlation_lags, -1, correlation_vector2);	467 correlation_length, correlation_lags, correlation_scale, -1);

458	468

459 // Find maximizing index.	469 // Find maximizing index.

460 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);	470 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);

461 int32_t max_correlation = correlation_vector2[best_index];	471 int32_t max_correlation = correlation_vector2[best_index];

462 // Compensate index with start offset.	472 // Compensate index with start offset.

463 best_index = best_index + start_index;	473 best_index = best_index + start_index;

464	474

465 // Calculate energies.	475 // Calculate energies.

466 int32_t energy1 = WebRtcSpl_DotProductWithScale(	476 int32_t energy1 = WebRtcSpl_DotProductWithScale(

467 &(audio_history[signal_length - correlation_length]),	477 &(audio_history[signal_length - correlation_length]),

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
565 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;	575 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;

566 // Third lag is the average again, but rounding towards \|correlation_lag\|.	576 // Third lag is the average again, but rounding towards \|correlation_lag\|.

567 if (distortion_lag > correlation_lag) {	577 if (distortion_lag > correlation_lag) {

568 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;	578 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;

569 } else {	579 } else {

570 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;	580 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;

571 }	581 }

572 }	582 }

573	583

574 // Calculate the LPC and the gain of the filters.	584 // Calculate the LPC and the gain of the filters.

	585 // Calculate scale value needed for auto-correlation.

	586 correlation_scale = WebRtcSpl_MaxAbsValueW16(

	587 &(audio_history[signal_length - fs_mult_lpc_analysis_len]),

	588 fs_mult_lpc_analysis_len);

	589

	590 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);

	591 correlation_scale = std::max(correlation_scale * 2 + 7, 0);

575	592

576 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.	593 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.

577 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -	594 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -

578 kUnvoicedLpcOrder;	595 kUnvoicedLpcOrder;

579 // Copy signal to temporary vector to be able to pad with leading zeros.	596 // Copy signal to temporary vector to be able to pad with leading zeros.

580 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len	597 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len

581 + kUnvoicedLpcOrder];	598 + kUnvoicedLpcOrder];

582 memset(temp_signal, 0,	599 memset(temp_signal, 0,

583 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));	600 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));

584 memcpy(&temp_signal[kUnvoicedLpcOrder],	601 memcpy(&temp_signal[kUnvoicedLpcOrder],

585 &audio_history[temp_index + kUnvoicedLpcOrder],	602 &audio_history[temp_index + kUnvoicedLpcOrder],

586 sizeof(int16_t) * fs_mult_lpc_analysis_len);	603 sizeof(int16_t) * fs_mult_lpc_analysis_len);

587 correlation_scale = CrossCorrelationWithAutoShift(	604 WebRtcSpl_CrossCorrelation(auto_correlation,

588 &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],	605 &temp_signal[kUnvoicedLpcOrder],

589 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);	606 &temp_signal[kUnvoicedLpcOrder],

	607 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,

	608 correlation_scale, -1);

590 delete [] temp_signal;	609 delete [] temp_signal;

591	610

592 // Verify that variance is positive.	611 // Verify that variance is positive.

593 if (auto_correlation[0] > 0) {	612 if (auto_correlation[0] > 0) {

594 // Estimate AR filter parameters using Levinson-Durbin algorithm;	613 // Estimate AR filter parameters using Levinson-Durbin algorithm;

595 // kUnvoicedLpcOrder + 1 filter coefficients.	614 // kUnvoicedLpcOrder + 1 filter coefficients.

596 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,	615 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,

597 parameters.ar_filter,	616 parameters.ar_filter,

598 reflection_coeff,	617 reflection_coeff,

599 kUnvoicedLpcOrder);	618 kUnvoicedLpcOrder);

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
740 voice_mix_factor(0),	759 voice_mix_factor(0),

741 current_voice_mix_factor(0),	760 current_voice_mix_factor(0),

742 onset(false),	761 onset(false),

743 mute_slope(0) {	762 mute_slope(0) {

744 memset(ar_filter, 0, sizeof(ar_filter));	763 memset(ar_filter, 0, sizeof(ar_filter));

745 memset(ar_filter_state, 0, sizeof(ar_filter_state));	764 memset(ar_filter_state, 0, sizeof(ar_filter_state));

746 }	765 }

747	766

748 void Expand::Correlation(const int16_t* input,	767 void Expand::Correlation(const int16_t* input,

749 size_t input_length,	768 size_t input_length,

750 int16_t* output) const {	769 int16_t* output,

	770 int* output_scale) const {

751 // Set parameters depending on sample rate.	771 // Set parameters depending on sample rate.

752 const int16_t* filter_coefficients;	772 const int16_t* filter_coefficients;

753 size_t num_coefficients;	773 size_t num_coefficients;

754 int16_t downsampling_factor;	774 int16_t downsampling_factor;

755 if (fs_hz_ == 8000) {	775 if (fs_hz_ == 8000) {

756 num_coefficients = 3;	776 num_coefficients = 3;

757 downsampling_factor = 2;	777 downsampling_factor = 2;

758 filter_coefficients = DspHelper::kDownsample8kHzTbl;	778 filter_coefficients = DspHelper::kDownsample8kHzTbl;

759 } else if (fs_hz_ == 16000) {	779 } else if (fs_hz_ == 16000) {

760 num_coefficients = 5;	780 num_coefficients = 5;

(...skipping 26 matching lines...) Expand all Loading...
787 downsampling_factor, kFilterDelay);	807 downsampling_factor, kFilterDelay);

788	808

789 // Normalize \|downsampled_input\| to using all 16 bits.	809 // Normalize \|downsampled_input\| to using all 16 bits.

790 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,	810 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,

791 kDownsampledLength);	811 kDownsampledLength);

792 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);	812 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);

793 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,	813 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,

794 downsampled_input, norm_shift);	814 downsampled_input, norm_shift);

795	815

796 int32_t correlation[kNumCorrelationLags];	816 int32_t correlation[kNumCorrelationLags];

797 CrossCorrelationWithAutoShift(	817 static const int kCorrelationShift = 6;

	818 WebRtcSpl_CrossCorrelation(

	819 correlation,

798 &downsampled_input[kDownsampledLength - kCorrelationLength],	820 &downsampled_input[kDownsampledLength - kCorrelationLength],

799 &downsampled_input[kDownsampledLength - kCorrelationLength	821 &downsampled_input[kDownsampledLength - kCorrelationLength

800 - kCorrelationStartLag],	822 - kCorrelationStartLag],

801 kCorrelationLength, kNumCorrelationLags, -1, correlation);	823 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);

802	824

803 // Normalize and move data from 32-bit to 16-bit vector.	825 // Normalize and move data from 32-bit to 16-bit vector.

804 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,	826 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,

805 kNumCorrelationLags);	827 kNumCorrelationLags);

806 int16_t norm_shift2 = static_cast<int16_t>(	828 int16_t norm_shift2 = static_cast<int16_t>(

807 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));	829 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));

808 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,	830 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,

809 norm_shift2);	831 norm_shift2);

	832 // Total scale factor (right shifts) of correlation value.

	833 output_scale = 2 norm_shift + kCorrelationShift + norm_shift2;

810 }	834 }

811	835

812 void Expand::UpdateLagIndex() {	836 void Expand::UpdateLagIndex() {

813 current_lag_index_ = current_lag_index_ + lag_index_direction_;	837 current_lag_index_ = current_lag_index_ + lag_index_direction_;

814 // Change direction if needed.	838 // Change direction if needed.

815 if (current_lag_index_ <= 0) {	839 if (current_lag_index_ <= 0) {

816 lag_index_direction_ = 1;	840 lag_index_direction_ = 1;

817 }	841 }

818 if (current_lag_index_ >= kNumLags - 1) {	842 if (current_lag_index_ >= kNumLags - 1) {

819 lag_index_direction_ = -1;	843 lag_index_direction_ = -1;

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
929 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;	953 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;

930 while (samples_generated < length) {	954 while (samples_generated < length) {

931 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);	955 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);

932 random_vector_->IncreaseSeedIncrement(seed_increment);	956 random_vector_->IncreaseSeedIncrement(seed_increment);

933 random_vector_->Generate(rand_length, &random_vector[samples_generated]);	957 random_vector_->Generate(rand_length, &random_vector[samples_generated]);

934 samples_generated += rand_length;	958 samples_generated += rand_length;

935 }	959 }

936 }	960 }

937	961

938 } // namespace webrtc	962 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_coding/neteq/expand.h ('k') | webrtc/modules/audio_coding/neteq/merge.h » ('j') | no next file with comments »