Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(602)

Side by Side Diff: webrtc/modules/audio_coding/neteq/expand.cc

Issue 1925053002: Revert of Avoiding overflow in cross correlation in NetEq. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/modules/audio_coding/neteq/expand.h ('k') | webrtc/modules/audio_coding/neteq/merge.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_coding/neteq/expand.h" 11 #include "webrtc/modules/audio_coding/neteq/expand.h"
12 12
13 #include <assert.h> 13 #include <assert.h>
14 #include <string.h> // memset 14 #include <string.h> // memset
15 15
16 #include <algorithm> // min, max 16 #include <algorithm> // min, max
17 #include <limits> // numeric_limits<T> 17 #include <limits> // numeric_limits<T>
18 18
19 #include "webrtc/base/safe_conversions.h" 19 #include "webrtc/base/safe_conversions.h"
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" 20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"
21 #include "webrtc/modules/audio_coding/neteq/background_noise.h" 21 #include "webrtc/modules/audio_coding/neteq/background_noise.h"
22 #include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
23 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" 22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
24 #include "webrtc/modules/audio_coding/neteq/random_vector.h" 23 #include "webrtc/modules/audio_coding/neteq/random_vector.h"
25 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h" 24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
26 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" 25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
27 26
28 namespace webrtc { 27 namespace webrtc {
29 28
30 Expand::Expand(BackgroundNoise* background_noise, 29 Expand::Expand(BackgroundNoise* background_noise,
31 SyncBuffer* sync_buffer, 30 SyncBuffer* sync_buffer,
32 RandomVector* random_vector, 31 RandomVector* random_vector,
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after
373 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; 372 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
374 373
375 const size_t signal_length = static_cast<size_t>(256 * fs_mult); 374 const size_t signal_length = static_cast<size_t>(256 * fs_mult);
376 const int16_t* audio_history = 375 const int16_t* audio_history =
377 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; 376 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];
378 377
379 // Initialize. 378 // Initialize.
380 InitializeForAnExpandPeriod(); 379 InitializeForAnExpandPeriod();
381 380
382 // Calculate correlation in downsampled domain (4 kHz sample rate). 381 // Calculate correlation in downsampled domain (4 kHz sample rate).
382 int correlation_scale;
383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. 383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
384 // If it is decided to break bit-exactness |correlation_length| should be 384 // If it is decided to break bit-exactness |correlation_length| should be
385 // initialized to the return value of Correlation(). 385 // initialized to the return value of Correlation().
386 Correlation(audio_history, signal_length, correlation_vector); 386 Correlation(audio_history, signal_length, correlation_vector,
387 &correlation_scale);
387 388
388 // Find peaks in correlation vector. 389 // Find peaks in correlation vector.
389 DspHelper::PeakDetection(correlation_vector, correlation_length, 390 DspHelper::PeakDetection(correlation_vector, correlation_length,
390 kNumCorrelationCandidates, fs_mult, 391 kNumCorrelationCandidates, fs_mult,
391 best_correlation_index, best_correlation); 392 best_correlation_index, best_correlation);
392 393
393 // Adjust peak locations; cross-correlation lags start at 2.5 ms 394 // Adjust peak locations; cross-correlation lags start at 2.5 ms
394 // (20 * fs_mult samples). 395 // (20 * fs_mult samples).
395 best_correlation_index[0] += fs_mult_20; 396 best_correlation_index[0] += fs_mult_20;
396 best_correlation_index[1] += fs_mult_20; 397 best_correlation_index[1] += fs_mult_20;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
442 std::max(std::min(distortion_lag + 10, fs_mult_120), 443 std::max(std::min(distortion_lag + 10, fs_mult_120),
443 static_cast<size_t>(60 * fs_mult)); 444 static_cast<size_t>(60 * fs_mult));
444 445
445 size_t start_index = std::min(distortion_lag, correlation_lag); 446 size_t start_index = std::min(distortion_lag, correlation_lag);
446 size_t correlation_lags = static_cast<size_t>( 447 size_t correlation_lags = static_cast<size_t>(
447 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1); 448 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);
448 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1)); 449 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));
449 450
450 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { 451 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
451 ChannelParameters& parameters = channel_parameters_[channel_ix]; 452 ChannelParameters& parameters = channel_parameters_[channel_ix];
453 // Calculate suitable scaling.
454 int16_t signal_max = WebRtcSpl_MaxAbsValueW16(
455 &audio_history[signal_length - correlation_length - start_index
456 - correlation_lags],
457 correlation_length + start_index + correlation_lags - 1);
458 correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +
459 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;
460 correlation_scale = std::max(0, correlation_scale);
452 461
453 // Calculate the correlation, store in |correlation_vector2|. 462 // Calculate the correlation, store in |correlation_vector2|.
454 int correlation_scale = CrossCorrelationWithAutoShift( 463 WebRtcSpl_CrossCorrelation(
464 correlation_vector2,
455 &(audio_history[signal_length - correlation_length]), 465 &(audio_history[signal_length - correlation_length]),
456 &(audio_history[signal_length - correlation_length - start_index]), 466 &(audio_history[signal_length - correlation_length - start_index]),
457 correlation_length, correlation_lags, -1, correlation_vector2); 467 correlation_length, correlation_lags, correlation_scale, -1);
458 468
459 // Find maximizing index. 469 // Find maximizing index.
460 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); 470 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
461 int32_t max_correlation = correlation_vector2[best_index]; 471 int32_t max_correlation = correlation_vector2[best_index];
462 // Compensate index with start offset. 472 // Compensate index with start offset.
463 best_index = best_index + start_index; 473 best_index = best_index + start_index;
464 474
465 // Calculate energies. 475 // Calculate energies.
466 int32_t energy1 = WebRtcSpl_DotProductWithScale( 476 int32_t energy1 = WebRtcSpl_DotProductWithScale(
467 &(audio_history[signal_length - correlation_length]), 477 &(audio_history[signal_length - correlation_length]),
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
565 expand_lags_[1] = (distortion_lag + correlation_lag) / 2; 575 expand_lags_[1] = (distortion_lag + correlation_lag) / 2;
566 // Third lag is the average again, but rounding towards |correlation_lag|. 576 // Third lag is the average again, but rounding towards |correlation_lag|.
567 if (distortion_lag > correlation_lag) { 577 if (distortion_lag > correlation_lag) {
568 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; 578 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;
569 } else { 579 } else {
570 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; 580 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;
571 } 581 }
572 } 582 }
573 583
574 // Calculate the LPC and the gain of the filters. 584 // Calculate the LPC and the gain of the filters.
585 // Calculate scale value needed for auto-correlation.
586 correlation_scale = WebRtcSpl_MaxAbsValueW16(
587 &(audio_history[signal_length - fs_mult_lpc_analysis_len]),
588 fs_mult_lpc_analysis_len);
589
590 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);
591 correlation_scale = std::max(correlation_scale * 2 + 7, 0);
575 592
576 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. 593 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.
577 size_t temp_index = signal_length - fs_mult_lpc_analysis_len - 594 size_t temp_index = signal_length - fs_mult_lpc_analysis_len -
578 kUnvoicedLpcOrder; 595 kUnvoicedLpcOrder;
579 // Copy signal to temporary vector to be able to pad with leading zeros. 596 // Copy signal to temporary vector to be able to pad with leading zeros.
580 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len 597 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len
581 + kUnvoicedLpcOrder]; 598 + kUnvoicedLpcOrder];
582 memset(temp_signal, 0, 599 memset(temp_signal, 0,
583 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); 600 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));
584 memcpy(&temp_signal[kUnvoicedLpcOrder], 601 memcpy(&temp_signal[kUnvoicedLpcOrder],
585 &audio_history[temp_index + kUnvoicedLpcOrder], 602 &audio_history[temp_index + kUnvoicedLpcOrder],
586 sizeof(int16_t) * fs_mult_lpc_analysis_len); 603 sizeof(int16_t) * fs_mult_lpc_analysis_len);
587 correlation_scale = CrossCorrelationWithAutoShift( 604 WebRtcSpl_CrossCorrelation(auto_correlation,
588 &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder], 605 &temp_signal[kUnvoicedLpcOrder],
589 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation); 606 &temp_signal[kUnvoicedLpcOrder],
607 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,
608 correlation_scale, -1);
590 delete [] temp_signal; 609 delete [] temp_signal;
591 610
592 // Verify that variance is positive. 611 // Verify that variance is positive.
593 if (auto_correlation[0] > 0) { 612 if (auto_correlation[0] > 0) {
594 // Estimate AR filter parameters using Levinson-Durbin algorithm; 613 // Estimate AR filter parameters using Levinson-Durbin algorithm;
595 // kUnvoicedLpcOrder + 1 filter coefficients. 614 // kUnvoicedLpcOrder + 1 filter coefficients.
596 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation, 615 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,
597 parameters.ar_filter, 616 parameters.ar_filter,
598 reflection_coeff, 617 reflection_coeff,
599 kUnvoicedLpcOrder); 618 kUnvoicedLpcOrder);
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
740 voice_mix_factor(0), 759 voice_mix_factor(0),
741 current_voice_mix_factor(0), 760 current_voice_mix_factor(0),
742 onset(false), 761 onset(false),
743 mute_slope(0) { 762 mute_slope(0) {
744 memset(ar_filter, 0, sizeof(ar_filter)); 763 memset(ar_filter, 0, sizeof(ar_filter));
745 memset(ar_filter_state, 0, sizeof(ar_filter_state)); 764 memset(ar_filter_state, 0, sizeof(ar_filter_state));
746 } 765 }
747 766
748 void Expand::Correlation(const int16_t* input, 767 void Expand::Correlation(const int16_t* input,
749 size_t input_length, 768 size_t input_length,
750 int16_t* output) const { 769 int16_t* output,
770 int* output_scale) const {
751 // Set parameters depending on sample rate. 771 // Set parameters depending on sample rate.
752 const int16_t* filter_coefficients; 772 const int16_t* filter_coefficients;
753 size_t num_coefficients; 773 size_t num_coefficients;
754 int16_t downsampling_factor; 774 int16_t downsampling_factor;
755 if (fs_hz_ == 8000) { 775 if (fs_hz_ == 8000) {
756 num_coefficients = 3; 776 num_coefficients = 3;
757 downsampling_factor = 2; 777 downsampling_factor = 2;
758 filter_coefficients = DspHelper::kDownsample8kHzTbl; 778 filter_coefficients = DspHelper::kDownsample8kHzTbl;
759 } else if (fs_hz_ == 16000) { 779 } else if (fs_hz_ == 16000) {
760 num_coefficients = 5; 780 num_coefficients = 5;
(...skipping 26 matching lines...) Expand all
787 downsampling_factor, kFilterDelay); 807 downsampling_factor, kFilterDelay);
788 808
789 // Normalize |downsampled_input| to using all 16 bits. 809 // Normalize |downsampled_input| to using all 16 bits.
790 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input, 810 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,
791 kDownsampledLength); 811 kDownsampledLength);
792 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); 812 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);
793 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, 813 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,
794 downsampled_input, norm_shift); 814 downsampled_input, norm_shift);
795 815
796 int32_t correlation[kNumCorrelationLags]; 816 int32_t correlation[kNumCorrelationLags];
797 CrossCorrelationWithAutoShift( 817 static const int kCorrelationShift = 6;
818 WebRtcSpl_CrossCorrelation(
819 correlation,
798 &downsampled_input[kDownsampledLength - kCorrelationLength], 820 &downsampled_input[kDownsampledLength - kCorrelationLength],
799 &downsampled_input[kDownsampledLength - kCorrelationLength 821 &downsampled_input[kDownsampledLength - kCorrelationLength
800 - kCorrelationStartLag], 822 - kCorrelationStartLag],
801 kCorrelationLength, kNumCorrelationLags, -1, correlation); 823 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);
802 824
803 // Normalize and move data from 32-bit to 16-bit vector. 825 // Normalize and move data from 32-bit to 16-bit vector.
804 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, 826 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
805 kNumCorrelationLags); 827 kNumCorrelationLags);
806 int16_t norm_shift2 = static_cast<int16_t>( 828 int16_t norm_shift2 = static_cast<int16_t>(
807 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); 829 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));
808 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, 830 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,
809 norm_shift2); 831 norm_shift2);
832 // Total scale factor (right shifts) of correlation value.
833 *output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2;
810 } 834 }
811 835
812 void Expand::UpdateLagIndex() { 836 void Expand::UpdateLagIndex() {
813 current_lag_index_ = current_lag_index_ + lag_index_direction_; 837 current_lag_index_ = current_lag_index_ + lag_index_direction_;
814 // Change direction if needed. 838 // Change direction if needed.
815 if (current_lag_index_ <= 0) { 839 if (current_lag_index_ <= 0) {
816 lag_index_direction_ = 1; 840 lag_index_direction_ = 1;
817 } 841 }
818 if (current_lag_index_ >= kNumLags - 1) { 842 if (current_lag_index_ >= kNumLags - 1) {
819 lag_index_direction_ = -1; 843 lag_index_direction_ = -1;
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
929 const size_t kMaxRandSamples = RandomVector::kRandomTableSize; 953 const size_t kMaxRandSamples = RandomVector::kRandomTableSize;
930 while (samples_generated < length) { 954 while (samples_generated < length) {
931 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); 955 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);
932 random_vector_->IncreaseSeedIncrement(seed_increment); 956 random_vector_->IncreaseSeedIncrement(seed_increment);
933 random_vector_->Generate(rand_length, &random_vector[samples_generated]); 957 random_vector_->Generate(rand_length, &random_vector[samples_generated]);
934 samples_generated += rand_length; 958 samples_generated += rand_length;
935 } 959 }
936 } 960 }
937 961
938 } // namespace webrtc 962 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_coding/neteq/expand.h ('k') | webrtc/modules/audio_coding/neteq/merge.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698