OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_coding/neteq/expand.h" | 11 #include "webrtc/modules/audio_coding/neteq/expand.h" |
12 | 12 |
13 #include <assert.h> | 13 #include <assert.h> |
14 #include <string.h> // memset | 14 #include <string.h> // memset |
15 | 15 |
16 #include <algorithm> // min, max | 16 #include <algorithm> // min, max |
17 #include <limits> // numeric_limits<T> | 17 #include <limits> // numeric_limits<T> |
18 | 18 |
19 #include "webrtc/base/safe_conversions.h" | 19 #include "webrtc/base/safe_conversions.h" |
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" | 20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" |
21 #include "webrtc/modules/audio_coding/neteq/background_noise.h" | 21 #include "webrtc/modules/audio_coding/neteq/background_noise.h" |
22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" | 22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" |
23 #include "webrtc/modules/audio_coding/neteq/random_vector.h" | 23 #include "webrtc/modules/audio_coding/neteq/random_vector.h" |
24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h" | 24 #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h" |
25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" | 25 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" |
26 | 26 |
27 namespace webrtc { | 27 namespace webrtc { |
28 | 28 |
29 namespace { | |
30 | |
31 // This function decides the overflow-protecting scaling and call | |
32 // WebRtcSpl_CrossCorrelation. | |
33 void CrossCorrelation(int32_t* cross_correlation, | |
34 const int16_t* sequence_1, | |
35 const int16_t* sequence_2, | |
36 size_t sequence_1_length, | |
37 size_t cross_correlation_length, | |
38 int* right_shifts, | |
39 int cross_correlation_step) { | |
40 // Find the maximum absolute value of sequence_1 and 2. | |
41 const int16_t max_1 = WebRtcSpl_MaxAbsValueW16(sequence_1, sequence_1_length); | |
42 const int sequence_2_shift = | |
43 cross_correlation_step * (cross_correlation_length - 1); | |
44 const int16_t* sequence_2_start = | |
45 sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift; | |
46 const size_t sequence_2_length = sequence_1_length + abs(sequence_2_shift); | |
47 const int16_t max_2 = | |
48 WebRtcSpl_MaxAbsValueW16(sequence_2_start, sequence_2_length); | |
49 | |
50 // In order to avoid overflow when computing the sum we should scale the | |
51 // samples so that (in_vector_length * max_1 * max_2) will not overflow. | |
52 // Expected scaling fulfills | |
53 // 1) sufficient: | |
54 // sequence_1_length * (max_1 * max_2 >> scaling) <= 0x7fffffff; | |
55 // 2) necessary: | |
56 // if (scaling > 0) | |
57 // sequence_1_length * (max_1 * max_2 >> (scaling - 1)) > 0x7fffffff; | |
58 // The following calculation fulfills 1) and almost fulfills 2). | |
59 // There are some corner cases that 2) is not satisfied, e.g., | |
60 // max_1 = 17, max_2 = 30848, sequence_1_length = 4095, in such case, | |
61 // optimal scaling is 0, while the following calculation results in 1. | |
62 const int32_t factor = max_1 * max_2 / (std::numeric_limits<int32_t>::max() / | |
hlundin-webrtc
2016/04/22 06:48:56
Even though it is true that A*B/C will be evaluate
minyue-webrtc
2016/04/22 13:58:30
Done.
| |
63 static_cast<int32_t>(sequence_1_length)); | |
64 const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); | |
65 | |
66 assert((double)max_1 * max_2 * sequence_1_length / (1 << scaling) <= | |
67 WEBRTC_SPL_WORD32_MAX); | |
68 assert(scaling == 0 || | |
69 (double)max_1 * max_2 * sequence_1_length /(1 << scaling) * 2 > | |
70 WEBRTC_SPL_WORD32_MAX); | |
71 | |
72 WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2, | |
73 sequence_1_length, cross_correlation_length, | |
74 scaling, cross_correlation_step); | |
75 if (right_shifts) | |
76 *right_shifts = scaling; | |
77 } | |
78 | |
79 } // namespace | |
80 | |
29 Expand::Expand(BackgroundNoise* background_noise, | 81 Expand::Expand(BackgroundNoise* background_noise, |
30 SyncBuffer* sync_buffer, | 82 SyncBuffer* sync_buffer, |
31 RandomVector* random_vector, | 83 RandomVector* random_vector, |
32 StatisticsCalculator* statistics, | 84 StatisticsCalculator* statistics, |
33 int fs, | 85 int fs, |
34 size_t num_channels) | 86 size_t num_channels) |
35 : random_vector_(random_vector), | 87 : random_vector_(random_vector), |
36 sync_buffer_(sync_buffer), | 88 sync_buffer_(sync_buffer), |
37 first_expand_(true), | 89 first_expand_(true), |
38 fs_hz_(fs), | 90 fs_hz_(fs), |
(...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
372 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; | 424 size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; |
373 | 425 |
374 const size_t signal_length = static_cast<size_t>(256 * fs_mult); | 426 const size_t signal_length = static_cast<size_t>(256 * fs_mult); |
375 const int16_t* audio_history = | 427 const int16_t* audio_history = |
376 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; | 428 &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; |
377 | 429 |
378 // Initialize. | 430 // Initialize. |
379 InitializeForAnExpandPeriod(); | 431 InitializeForAnExpandPeriod(); |
380 | 432 |
381 // Calculate correlation in downsampled domain (4 kHz sample rate). | 433 // Calculate correlation in downsampled domain (4 kHz sample rate). |
382 int correlation_scale; | |
383 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. | 434 size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. |
384 // If it is decided to break bit-exactness |correlation_length| should be | 435 // If it is decided to break bit-exactness |correlation_length| should be |
385 // initialized to the return value of Correlation(). | 436 // initialized to the return value of Correlation(). |
386 Correlation(audio_history, signal_length, correlation_vector, | 437 Correlation(audio_history, signal_length, correlation_vector); |
387 &correlation_scale); | |
388 | 438 |
389 // Find peaks in correlation vector. | 439 // Find peaks in correlation vector. |
390 DspHelper::PeakDetection(correlation_vector, correlation_length, | 440 DspHelper::PeakDetection(correlation_vector, correlation_length, |
391 kNumCorrelationCandidates, fs_mult, | 441 kNumCorrelationCandidates, fs_mult, |
392 best_correlation_index, best_correlation); | 442 best_correlation_index, best_correlation); |
393 | 443 |
394 // Adjust peak locations; cross-correlation lags start at 2.5 ms | 444 // Adjust peak locations; cross-correlation lags start at 2.5 ms |
395 // (20 * fs_mult samples). | 445 // (20 * fs_mult samples). |
396 best_correlation_index[0] += fs_mult_20; | 446 best_correlation_index[0] += fs_mult_20; |
397 best_correlation_index[1] += fs_mult_20; | 447 best_correlation_index[1] += fs_mult_20; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
443 std::max(std::min(distortion_lag + 10, fs_mult_120), | 493 std::max(std::min(distortion_lag + 10, fs_mult_120), |
444 static_cast<size_t>(60 * fs_mult)); | 494 static_cast<size_t>(60 * fs_mult)); |
445 | 495 |
446 size_t start_index = std::min(distortion_lag, correlation_lag); | 496 size_t start_index = std::min(distortion_lag, correlation_lag); |
447 size_t correlation_lags = static_cast<size_t>( | 497 size_t correlation_lags = static_cast<size_t>( |
448 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1); | 498 WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1); |
449 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1)); | 499 assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1)); |
450 | 500 |
451 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { | 501 for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { |
452 ChannelParameters& parameters = channel_parameters_[channel_ix]; | 502 ChannelParameters& parameters = channel_parameters_[channel_ix]; |
453 // Calculate suitable scaling. | 503 |
454 int16_t signal_max = WebRtcSpl_MaxAbsValueW16( | 504 int correlation_scale; |
455 &audio_history[signal_length - correlation_length - start_index | |
456 - correlation_lags], | |
457 correlation_length + start_index + correlation_lags - 1); | |
458 correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) + | |
459 (31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31; | |
460 correlation_scale = std::max(0, correlation_scale); | |
461 | 505 |
462 // Calculate the correlation, store in |correlation_vector2|. | 506 // Calculate the correlation, store in |correlation_vector2|. |
463 WebRtcSpl_CrossCorrelation( | 507 CrossCorrelation( |
464 correlation_vector2, | 508 correlation_vector2, |
465 &(audio_history[signal_length - correlation_length]), | 509 &(audio_history[signal_length - correlation_length]), |
466 &(audio_history[signal_length - correlation_length - start_index]), | 510 &(audio_history[signal_length - correlation_length - start_index]), |
467 correlation_length, correlation_lags, correlation_scale, -1); | 511 correlation_length, correlation_lags, &correlation_scale, -1); |
468 | 512 |
469 // Find maximizing index. | 513 // Find maximizing index. |
470 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); | 514 best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); |
471 int32_t max_correlation = correlation_vector2[best_index]; | 515 int32_t max_correlation = correlation_vector2[best_index]; |
472 // Compensate index with start offset. | 516 // Compensate index with start offset. |
473 best_index = best_index + start_index; | 517 best_index = best_index + start_index; |
474 | 518 |
475 // Calculate energies. | 519 // Calculate energies. |
476 int32_t energy1 = WebRtcSpl_DotProductWithScale( | 520 int32_t energy1 = WebRtcSpl_DotProductWithScale( |
477 &(audio_history[signal_length - correlation_length]), | 521 &(audio_history[signal_length - correlation_length]), |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
575 expand_lags_[1] = (distortion_lag + correlation_lag) / 2; | 619 expand_lags_[1] = (distortion_lag + correlation_lag) / 2; |
576 // Third lag is the average again, but rounding towards |correlation_lag|. | 620 // Third lag is the average again, but rounding towards |correlation_lag|. |
577 if (distortion_lag > correlation_lag) { | 621 if (distortion_lag > correlation_lag) { |
578 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; | 622 expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; |
579 } else { | 623 } else { |
580 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; | 624 expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; |
581 } | 625 } |
582 } | 626 } |
583 | 627 |
584 // Calculate the LPC and the gain of the filters. | 628 // Calculate the LPC and the gain of the filters. |
585 // Calculate scale value needed for auto-correlation. | |
586 correlation_scale = WebRtcSpl_MaxAbsValueW16( | |
587 &(audio_history[signal_length - fs_mult_lpc_analysis_len]), | |
588 fs_mult_lpc_analysis_len); | |
589 | |
590 correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0); | |
591 correlation_scale = std::max(correlation_scale * 2 + 7, 0); | |
592 | 629 |
593 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. | 630 // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. |
594 size_t temp_index = signal_length - fs_mult_lpc_analysis_len - | 631 size_t temp_index = signal_length - fs_mult_lpc_analysis_len - |
595 kUnvoicedLpcOrder; | 632 kUnvoicedLpcOrder; |
596 // Copy signal to temporary vector to be able to pad with leading zeros. | 633 // Copy signal to temporary vector to be able to pad with leading zeros. |
597 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len | 634 int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len |
598 + kUnvoicedLpcOrder]; | 635 + kUnvoicedLpcOrder]; |
599 memset(temp_signal, 0, | 636 memset(temp_signal, 0, |
600 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); | 637 sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); |
601 memcpy(&temp_signal[kUnvoicedLpcOrder], | 638 memcpy(&temp_signal[kUnvoicedLpcOrder], |
602 &audio_history[temp_index + kUnvoicedLpcOrder], | 639 &audio_history[temp_index + kUnvoicedLpcOrder], |
603 sizeof(int16_t) * fs_mult_lpc_analysis_len); | 640 sizeof(int16_t) * fs_mult_lpc_analysis_len); |
604 WebRtcSpl_CrossCorrelation(auto_correlation, | 641 CrossCorrelation(auto_correlation, |
605 &temp_signal[kUnvoicedLpcOrder], | 642 &temp_signal[kUnvoicedLpcOrder], |
606 &temp_signal[kUnvoicedLpcOrder], | 643 &temp_signal[kUnvoicedLpcOrder], |
607 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, | 644 fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, |
608 correlation_scale, -1); | 645 &correlation_scale, -1); |
609 delete [] temp_signal; | 646 delete [] temp_signal; |
610 | 647 |
611 // Verify that variance is positive. | 648 // Verify that variance is positive. |
612 if (auto_correlation[0] > 0) { | 649 if (auto_correlation[0] > 0) { |
613 // Estimate AR filter parameters using Levinson-Durbin algorithm; | 650 // Estimate AR filter parameters using Levinson-Durbin algorithm; |
614 // kUnvoicedLpcOrder + 1 filter coefficients. | 651 // kUnvoicedLpcOrder + 1 filter coefficients. |
615 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation, | 652 int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation, |
616 parameters.ar_filter, | 653 parameters.ar_filter, |
617 reflection_coeff, | 654 reflection_coeff, |
618 kUnvoicedLpcOrder); | 655 kUnvoicedLpcOrder); |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
759 voice_mix_factor(0), | 796 voice_mix_factor(0), |
760 current_voice_mix_factor(0), | 797 current_voice_mix_factor(0), |
761 onset(false), | 798 onset(false), |
762 mute_slope(0) { | 799 mute_slope(0) { |
763 memset(ar_filter, 0, sizeof(ar_filter)); | 800 memset(ar_filter, 0, sizeof(ar_filter)); |
764 memset(ar_filter_state, 0, sizeof(ar_filter_state)); | 801 memset(ar_filter_state, 0, sizeof(ar_filter_state)); |
765 } | 802 } |
766 | 803 |
767 void Expand::Correlation(const int16_t* input, | 804 void Expand::Correlation(const int16_t* input, |
768 size_t input_length, | 805 size_t input_length, |
769 int16_t* output, | 806 int16_t* output) const { |
770 int* output_scale) const { | |
771 // Set parameters depending on sample rate. | 807 // Set parameters depending on sample rate. |
772 const int16_t* filter_coefficients; | 808 const int16_t* filter_coefficients; |
773 size_t num_coefficients; | 809 size_t num_coefficients; |
774 int16_t downsampling_factor; | 810 int16_t downsampling_factor; |
775 if (fs_hz_ == 8000) { | 811 if (fs_hz_ == 8000) { |
776 num_coefficients = 3; | 812 num_coefficients = 3; |
777 downsampling_factor = 2; | 813 downsampling_factor = 2; |
778 filter_coefficients = DspHelper::kDownsample8kHzTbl; | 814 filter_coefficients = DspHelper::kDownsample8kHzTbl; |
779 } else if (fs_hz_ == 16000) { | 815 } else if (fs_hz_ == 16000) { |
780 num_coefficients = 5; | 816 num_coefficients = 5; |
(...skipping 26 matching lines...) Expand all Loading... | |
807 downsampling_factor, kFilterDelay); | 843 downsampling_factor, kFilterDelay); |
808 | 844 |
809 // Normalize |downsampled_input| to using all 16 bits. | 845 // Normalize |downsampled_input| to using all 16 bits. |
810 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input, | 846 int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input, |
811 kDownsampledLength); | 847 kDownsampledLength); |
812 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); | 848 int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); |
813 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, | 849 WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, |
814 downsampled_input, norm_shift); | 850 downsampled_input, norm_shift); |
815 | 851 |
816 int32_t correlation[kNumCorrelationLags]; | 852 int32_t correlation[kNumCorrelationLags]; |
817 static const int kCorrelationShift = 6; | 853 CrossCorrelation( |
818 WebRtcSpl_CrossCorrelation( | |
819 correlation, | 854 correlation, |
820 &downsampled_input[kDownsampledLength - kCorrelationLength], | 855 &downsampled_input[kDownsampledLength - kCorrelationLength], |
821 &downsampled_input[kDownsampledLength - kCorrelationLength | 856 &downsampled_input[kDownsampledLength - kCorrelationLength |
822 - kCorrelationStartLag], | 857 - kCorrelationStartLag], |
823 kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1); | 858 kCorrelationLength, kNumCorrelationLags, nullptr, -1); |
824 | 859 |
825 // Normalize and move data from 32-bit to 16-bit vector. | 860 // Normalize and move data from 32-bit to 16-bit vector. |
826 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, | 861 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, |
827 kNumCorrelationLags); | 862 kNumCorrelationLags); |
828 int16_t norm_shift2 = static_cast<int16_t>( | 863 int16_t norm_shift2 = static_cast<int16_t>( |
829 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); | 864 std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); |
830 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, | 865 WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, |
831 norm_shift2); | 866 norm_shift2); |
832 // Total scale factor (right shifts) of correlation value. | |
833 *output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2; | |
834 } | 867 } |
835 | 868 |
836 void Expand::UpdateLagIndex() { | 869 void Expand::UpdateLagIndex() { |
837 current_lag_index_ = current_lag_index_ + lag_index_direction_; | 870 current_lag_index_ = current_lag_index_ + lag_index_direction_; |
838 // Change direction if needed. | 871 // Change direction if needed. |
839 if (current_lag_index_ <= 0) { | 872 if (current_lag_index_ <= 0) { |
840 lag_index_direction_ = 1; | 873 lag_index_direction_ = 1; |
841 } | 874 } |
842 if (current_lag_index_ >= kNumLags - 1) { | 875 if (current_lag_index_ >= kNumLags - 1) { |
843 lag_index_direction_ = -1; | 876 lag_index_direction_ = -1; |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
953 const size_t kMaxRandSamples = RandomVector::kRandomTableSize; | 986 const size_t kMaxRandSamples = RandomVector::kRandomTableSize; |
954 while (samples_generated < length) { | 987 while (samples_generated < length) { |
955 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); | 988 size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); |
956 random_vector_->IncreaseSeedIncrement(seed_increment); | 989 random_vector_->IncreaseSeedIncrement(seed_increment); |
957 random_vector_->Generate(rand_length, &random_vector[samples_generated]); | 990 random_vector_->Generate(rand_length, &random_vector[samples_generated]); |
958 samples_generated += rand_length; | 991 samples_generated += rand_length; |
959 } | 992 } |
960 } | 993 } |
961 | 994 |
962 } // namespace webrtc | 995 } // namespace webrtc |
OLD | NEW |