Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "webrtc/modules/audio_coding/neteq/time_stretch.h" | 11 #include "webrtc/modules/audio_coding/neteq/time_stretch.h" |
| 12 | 12 |
| 13 #include <algorithm> // min, max | 13 #include <algorithm> // min, max |
| 14 | 14 |
| 15 #include "webrtc/base/scoped_ptr.h" | 15 #include "webrtc/base/scoped_ptr.h" |
| 16 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" | 16 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" |
| 17 #include "webrtc/modules/audio_coding/neteq/background_noise.h" | 17 #include "webrtc/modules/audio_coding/neteq/background_noise.h" |
| 18 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" | 18 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" |
| 19 | 19 |
| 20 namespace webrtc { | 20 namespace webrtc { |
| 21 | 21 |
| 22 TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input, | 22 TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input, |
| 23 size_t input_len, | 23 size_t input_len, |
| 24 bool fast_mode, | 24 bool fast_mode, |
| 25 AudioMultiVector* output, | 25 AudioMultiVector* output, |
| 26 int16_t* length_change_samples) { | 26 size_t* length_change_samples) { |
| 27 // Pre-calculate common multiplication with |fs_mult_|. | 27 // Pre-calculate common multiplication with |fs_mult_|. |
| 28 int fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms. | 28 size_t fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms. |
|
hlundin-webrtc
2015/08/10 11:30:02
int
| |
| 29 | 29 |
| 30 const int16_t* signal; | 30 const int16_t* signal; |
| 31 rtc::scoped_ptr<int16_t[]> signal_array; | 31 rtc::scoped_ptr<int16_t[]> signal_array; |
| 32 size_t signal_len; | 32 size_t signal_len; |
| 33 if (num_channels_ == 1) { | 33 if (num_channels_ == 1) { |
| 34 signal = input; | 34 signal = input; |
| 35 signal_len = input_len; | 35 signal_len = input_len; |
| 36 } else { | 36 } else { |
| 37 // We want |signal| to be only the first channel of |input|, which is | 37 // We want |signal| to be only the first channel of |input|, which is |
| 38 // interleaved. Thus, we take the first sample, skip forward |num_channels| | 38 // interleaved. Thus, we take the first sample, skip forward |num_channels| |
| 39 // samples, and continue like that. | 39 // samples, and continue like that. |
| 40 signal_len = input_len / num_channels_; | 40 signal_len = input_len / num_channels_; |
| 41 signal_array.reset(new int16_t[signal_len]); | 41 signal_array.reset(new int16_t[signal_len]); |
| 42 signal = signal_array.get(); | 42 signal = signal_array.get(); |
| 43 size_t j = master_channel_; | 43 size_t j = master_channel_; |
| 44 for (size_t i = 0; i < signal_len; ++i) { | 44 for (size_t i = 0; i < signal_len; ++i) { |
| 45 signal_array[i] = input[j]; | 45 signal_array[i] = input[j]; |
| 46 j += num_channels_; | 46 j += num_channels_; |
| 47 } | 47 } |
| 48 } | 48 } |
| 49 | 49 |
| 50 // Find maximum absolute value of input signal. | 50 // Find maximum absolute value of input signal. |
| 51 max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, | 51 max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len); |
| 52 static_cast<int>(signal_len)); | |
| 53 | 52 |
| 54 // Downsample to 4 kHz sample rate and calculate auto-correlation. | 53 // Downsample to 4 kHz sample rate and calculate auto-correlation. |
| 55 DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen, | 54 DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen, |
| 56 sample_rate_hz_, true /* compensate delay*/, | 55 sample_rate_hz_, true /* compensate delay*/, |
| 57 downsampled_input_); | 56 downsampled_input_); |
| 58 AutoCorrelation(); | 57 AutoCorrelation(); |
| 59 | 58 |
| 60 // Find the strongest correlation peak. | 59 // Find the strongest correlation peak. |
| 61 static const int kNumPeaks = 1; | 60 static const size_t kNumPeaks = 1; |
| 62 int peak_index; | 61 size_t peak_index; |
| 63 int16_t peak_value; | 62 int16_t peak_value; |
| 64 DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks, | 63 DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks, |
| 65 fs_mult_, &peak_index, &peak_value); | 64 fs_mult_, &peak_index, &peak_value); |
| 66 // Assert that |peak_index| stays within boundaries. | 65 // Assert that |peak_index| stays within boundaries. |
| 67 assert(peak_index >= 0); | |
| 68 assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_); | 66 assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_); |
| 69 | 67 |
| 70 // Compensate peak_index for displaced starting position. The displacement | 68 // Compensate peak_index for displaced starting position. The displacement |
| 71 // happens in AutoCorrelation(). Here, |kMinLag| is in the down-sampled 4 kHz | 69 // happens in AutoCorrelation(). Here, |kMinLag| is in the down-sampled 4 kHz |
| 72 // domain, while the |peak_index| is in the original sample rate; hence, the | 70 // domain, while the |peak_index| is in the original sample rate; hence, the |
| 73 // multiplication by fs_mult_ * 2. | 71 // multiplication by fs_mult_ * 2. |
| 74 peak_index += kMinLag * fs_mult_ * 2; | 72 peak_index += kMinLag * fs_mult_ * 2; |
| 75 // Assert that |peak_index| stays within boundaries. | 73 // Assert that |peak_index| stays within boundaries. |
| 76 assert(peak_index >= 20 * fs_mult_); | 74 assert(peak_index >= 20 * fs_mult_); |
| 77 assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_); | 75 assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_); |
| 78 | 76 |
| 79 // Calculate scaling to ensure that |peak_index| samples can be square-summed | 77 // Calculate scaling to ensure that |peak_index| samples can be square-summed |
| 80 // without overflowing. | 78 // without overflowing. |
| 81 int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) - | 79 int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) - |
| 82 WebRtcSpl_NormW32(peak_index); | 80 WebRtcSpl_NormW32(static_cast<int32_t>(peak_index)); |
|
hlundin-webrtc
2015/08/10 11:30:02
rtc::checked_cast
Peter Kasting
2015/08/17 22:49:47
This shouldn't be necessary, as the asserts above
hlundin-webrtc
2015/08/18 07:19:18
Acknowledged.
| |
| 83 scaling = std::max(0, scaling); | 81 scaling = std::max(0, scaling); |
| 84 | 82 |
| 85 // |vec1| starts at 15 ms minus one pitch period. | 83 // |vec1| starts at 15 ms minus one pitch period. |
| 86 const int16_t* vec1 = &signal[fs_mult_120 - peak_index]; | 84 const int16_t* vec1 = &signal[fs_mult_120 - peak_index]; |
| 87 // |vec2| start at 15 ms. | 85 // |vec2| start at 15 ms. |
| 88 const int16_t* vec2 = &signal[fs_mult_120]; | 86 const int16_t* vec2 = &signal[fs_mult_120]; |
| 89 // Calculate energies for |vec1| and |vec2|, assuming they both contain | 87 // Calculate energies for |vec1| and |vec2|, assuming they both contain |
| 90 // |peak_index| samples. | 88 // |peak_index| samples. |
| 91 int32_t vec1_energy = | 89 int32_t vec1_energy = |
| 92 WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling); | 90 WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling); |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 170 kCorrelationLen, kMaxLag - kMinLag, scaling, -1); | 168 kCorrelationLen, kMaxLag - kMinLag, scaling, -1); |
| 171 | 169 |
| 172 // Normalize correlation to 14 bits and write to |auto_correlation_|. | 170 // Normalize correlation to 14 bits and write to |auto_correlation_|. |
| 173 int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen); | 171 int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen); |
| 174 scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); | 172 scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); |
| 175 WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen, | 173 WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen, |
| 176 auto_corr, scaling); | 174 auto_corr, scaling); |
| 177 } | 175 } |
| 178 | 176 |
| 179 bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy, | 177 bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy, |
| 180 int peak_index, int scaling) const { | 178 size_t peak_index, int scaling) const { |
| 181 // Check if the signal seems to be active speech or not (simple VAD). | 179 // Check if the signal seems to be active speech or not (simple VAD). |
| 182 // If (vec1_energy + vec2_energy) / (2 * peak_index) <= | 180 // If (vec1_energy + vec2_energy) / (2 * peak_index) <= |
| 183 // 8 * background_noise_energy, then we say that the signal contains no | 181 // 8 * background_noise_energy, then we say that the signal contains no |
| 184 // active speech. | 182 // active speech. |
| 185 // Rewrite the inequality as: | 183 // Rewrite the inequality as: |
| 186 // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy. | 184 // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy. |
| 187 // The two sides of the inequality will be denoted |left_side| and | 185 // The two sides of the inequality will be denoted |left_side| and |
| 188 // |right_side|. | 186 // |right_side|. |
| 189 int32_t left_side = (vec1_energy + vec2_energy) / 16; | 187 int32_t left_side = (vec1_energy + vec2_energy) / 16; |
| 190 int32_t right_side; | 188 int32_t right_side; |
| 191 if (background_noise_.initialized()) { | 189 if (background_noise_.initialized()) { |
| 192 right_side = background_noise_.Energy(master_channel_); | 190 right_side = background_noise_.Energy(master_channel_); |
| 193 } else { | 191 } else { |
| 194 // If noise parameters have not been estimated, use a fixed threshold. | 192 // If noise parameters have not been estimated, use a fixed threshold. |
| 195 right_side = 75000; | 193 right_side = 75000; |
| 196 } | 194 } |
| 197 int right_scale = 16 - WebRtcSpl_NormW32(right_side); | 195 int right_scale = 16 - WebRtcSpl_NormW32(right_side); |
| 198 right_scale = std::max(0, right_scale); | 196 right_scale = std::max(0, right_scale); |
| 199 left_side = left_side >> right_scale; | 197 left_side = left_side >> right_scale; |
| 200 right_side = peak_index * (right_side >> right_scale); | 198 right_side = static_cast<int32_t>(peak_index) * (right_side >> right_scale); |
|
hlundin-webrtc
2015/08/10 11:30:02
rtc::checked_cast
Peter Kasting
2015/08/17 22:49:47
While today this shouldn't be necessary as |peak_i
hlundin-webrtc
2015/08/18 07:19:18
Thanks. Future changes is what worries me sometime
| |
| 201 | 199 |
| 202 // Scale |left_side| properly before comparing with |right_side|. | 200 // Scale |left_side| properly before comparing with |right_side|. |
| 203 // (|scaling| is the scale factor before energy calculation, thus the scale | 201 // (|scaling| is the scale factor before energy calculation, thus the scale |
| 204 // factor for the energy is 2 * scaling.) | 202 // factor for the energy is 2 * scaling.) |
| 205 if (WebRtcSpl_NormW32(left_side) < 2 * scaling) { | 203 if (WebRtcSpl_NormW32(left_side) < 2 * scaling) { |
| 206 // Cannot scale only |left_side|, must scale |right_side| too. | 204 // Cannot scale only |left_side|, must scale |right_side| too. |
| 207 int temp_scale = WebRtcSpl_NormW32(left_side); | 205 int temp_scale = WebRtcSpl_NormW32(left_side); |
| 208 left_side = left_side << temp_scale; | 206 left_side = left_side << temp_scale; |
| 209 right_side = right_side >> (2 * scaling - temp_scale); | 207 right_side = right_side >> (2 * scaling - temp_scale); |
| 210 } else { | 208 } else { |
| 211 left_side = left_side << 2 * scaling; | 209 left_side = left_side << 2 * scaling; |
| 212 } | 210 } |
| 213 return left_side > right_side; | 211 return left_side > right_side; |
| 214 } | 212 } |
| 215 | 213 |
| 216 } // namespace webrtc | 214 } // namespace webrtc |
| OLD | NEW |