webrtc/modules/audio_coding/neteq/time_stretch.cc - Issue 1228843002: Update audio code to use size_t more correctly,

Side by Side Diff: webrtc/modules/audio_coding/neteq/time_stretch.cc

Issue 1228843002: Update audio code to use size_t more correctly, (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Resync Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_coding/neteq/time_stretch.h ('K') | « webrtc/modules/audio_coding/neteq/time_stretch.h ('k') | webrtc/modules/audio_coding/neteq/time_stretch_unittest.cc » ('j') | webrtc/modules/audio_coding/neteq/tools/neteq_rtpplay.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/time_stretch.h"	11 #include "webrtc/modules/audio_coding/neteq/time_stretch.h"

12	12

13 #include <algorithm> // min, max	13 #include <algorithm> // min, max

14	14

15 #include "webrtc/base/scoped_ptr.h"	15 #include "webrtc/base/scoped_ptr.h"

16 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	16 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

17 #include "webrtc/modules/audio_coding/neteq/background_noise.h"	17 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

18 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	18 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

19	19

20 namespace webrtc {	20 namespace webrtc {

21	21

22 TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,	22 TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,

23 size_t input_len,	23 size_t input_len,

24 bool fast_mode,	24 bool fast_mode,

25 AudioMultiVector* output,	25 AudioMultiVector* output,

26 int16_t* length_change_samples) {	26 size_t* length_change_samples) {

27 // Pre-calculate common multiplication with \|fs_mult_\|.	27 // Pre-calculate common multiplication with \|fs_mult_\|.

28 int fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms.	28 size_t fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms.
	hlundin-webrtc 2015/08/10 11:30:02 int int
29	29

30 const int16_t* signal;	30 const int16_t* signal;

31 rtc::scoped_ptr<int16_t[]> signal_array;	31 rtc::scoped_ptr<int16_t[]> signal_array;

32 size_t signal_len;	32 size_t signal_len;

33 if (num_channels_ == 1) {	33 if (num_channels_ == 1) {

34 signal = input;	34 signal = input;

35 signal_len = input_len;	35 signal_len = input_len;

36 } else {	36 } else {

37 // We want \|signal\| to be only the first channel of \|input\|, which is	37 // We want \|signal\| to be only the first channel of \|input\|, which is

38 // interleaved. Thus, we take the first sample, skip forward \|num_channels\|	38 // interleaved. Thus, we take the first sample, skip forward \|num_channels\|

39 // samples, and continue like that.	39 // samples, and continue like that.

40 signal_len = input_len / num_channels_;	40 signal_len = input_len / num_channels_;

41 signal_array.reset(new int16_t[signal_len]);	41 signal_array.reset(new int16_t[signal_len]);

42 signal = signal_array.get();	42 signal = signal_array.get();

43 size_t j = master_channel_;	43 size_t j = master_channel_;

44 for (size_t i = 0; i < signal_len; ++i) {	44 for (size_t i = 0; i < signal_len; ++i) {

45 signal_array[i] = input[j];	45 signal_array[i] = input[j];

46 j += num_channels_;	46 j += num_channels_;

47 }	47 }

48 }	48 }

49	49

50 // Find maximum absolute value of input signal.	50 // Find maximum absolute value of input signal.

51 max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal,	51 max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len);

52 static_cast<int>(signal_len));

53	52

54 // Downsample to 4 kHz sample rate and calculate auto-correlation.	53 // Downsample to 4 kHz sample rate and calculate auto-correlation.

55 DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,	54 DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,

56 sample_rate_hz_, true /* compensate delay*/,	55 sample_rate_hz_, true /* compensate delay*/,

57 downsampled_input_);	56 downsampled_input_);

58 AutoCorrelation();	57 AutoCorrelation();

59	58

60 // Find the strongest correlation peak.	59 // Find the strongest correlation peak.

61 static const int kNumPeaks = 1;	60 static const size_t kNumPeaks = 1;

62 int peak_index;	61 size_t peak_index;

63 int16_t peak_value;	62 int16_t peak_value;

64 DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,	63 DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,

65 fs_mult_, &peak_index, &peak_value);	64 fs_mult_, &peak_index, &peak_value);

66 // Assert that \|peak_index\| stays within boundaries.	65 // Assert that \|peak_index\| stays within boundaries.

67 assert(peak_index >= 0);

68 assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);	66 assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);

69	67

70 // Compensate peak_index for displaced starting position. The displacement	68 // Compensate peak_index for displaced starting position. The displacement

71 // happens in AutoCorrelation(). Here, \|kMinLag\| is in the down-sampled 4 kHz	69 // happens in AutoCorrelation(). Here, \|kMinLag\| is in the down-sampled 4 kHz

72 // domain, while the \|peak_index\| is in the original sample rate; hence, the	70 // domain, while the \|peak_index\| is in the original sample rate; hence, the

73 // multiplication by fs_mult_ * 2.	71 // multiplication by fs_mult_ * 2.

74 peak_index += kMinLag * fs_mult_ * 2;	72 peak_index += kMinLag * fs_mult_ * 2;

75 // Assert that \|peak_index\| stays within boundaries.	73 // Assert that \|peak_index\| stays within boundaries.

76 assert(peak_index >= 20 * fs_mult_);	74 assert(peak_index >= 20 * fs_mult_);

77 assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);	75 assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);

78	76

79 // Calculate scaling to ensure that \|peak_index\| samples can be square-summed	77 // Calculate scaling to ensure that \|peak_index\| samples can be square-summed

80 // without overflowing.	78 // without overflowing.

81 int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -	79 int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -

82 WebRtcSpl_NormW32(peak_index);	80 WebRtcSpl_NormW32(static_cast<int32_t>(peak_index));
	hlundin-webrtc 2015/08/10 11:30:02 rtc::checked_cast rtc::checked_cast Peter Kasting 2015/08/17 22:49:47 This shouldn't be necessary, as the asserts above Show quoted text On 2015/08/10 11:30:02, hlundin-webrtc wrote: > rtc::checked_cast This shouldn't be necessary, as the asserts above guarantee that peak_index <= 119 * fs_mult_, which should fit in an int32_t. hlundin-webrtc 2015/08/18 07:19:18 Acknowledged. Show quoted text On 2015/08/17 22:49:47, Peter Kasting wrote: > On 2015/08/10 11:30:02, hlundin-webrtc wrote: > > rtc::checked_cast > > This shouldn't be necessary, as the asserts above guarantee that peak_index <= > 119 * fs_mult_, which should fit in an int32_t. Acknowledged.
83 scaling = std::max(0, scaling);	81 scaling = std::max(0, scaling);

84	82

85 // \|vec1\| starts at 15 ms minus one pitch period.	83 // \|vec1\| starts at 15 ms minus one pitch period.

86 const int16_t* vec1 = &signal[fs_mult_120 - peak_index];	84 const int16_t* vec1 = &signal[fs_mult_120 - peak_index];

87 // \|vec2\| start at 15 ms.	85 // \|vec2\| start at 15 ms.

88 const int16_t* vec2 = &signal[fs_mult_120];	86 const int16_t* vec2 = &signal[fs_mult_120];

89 // Calculate energies for \|vec1\| and \|vec2\|, assuming they both contain	87 // Calculate energies for \|vec1\| and \|vec2\|, assuming they both contain

90 // \|peak_index\| samples.	88 // \|peak_index\| samples.

91 int32_t vec1_energy =	89 int32_t vec1_energy =

92 WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling);	90 WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling);

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
170 kCorrelationLen, kMaxLag - kMinLag, scaling, -1);	168 kCorrelationLen, kMaxLag - kMinLag, scaling, -1);

171	169

172 // Normalize correlation to 14 bits and write to \|auto_correlation_\|.	170 // Normalize correlation to 14 bits and write to \|auto_correlation_\|.

173 int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);	171 int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);

174 scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));	172 scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));

175 WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,	173 WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,

176 auto_corr, scaling);	174 auto_corr, scaling);

177 }	175 }

178	176

179 bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,	177 bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,

180 int peak_index, int scaling) const {	178 size_t peak_index, int scaling) const {

181 // Check if the signal seems to be active speech or not (simple VAD).	179 // Check if the signal seems to be active speech or not (simple VAD).

182 // If (vec1_energy + vec2_energy) / (2 * peak_index) <=	180 // If (vec1_energy + vec2_energy) / (2 * peak_index) <=

183 // 8 * background_noise_energy, then we say that the signal contains no	181 // 8 * background_noise_energy, then we say that the signal contains no

184 // active speech.	182 // active speech.

185 // Rewrite the inequality as:	183 // Rewrite the inequality as:

186 // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy.	184 // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy.

187 // The two sides of the inequality will be denoted \|left_side\| and	185 // The two sides of the inequality will be denoted \|left_side\| and

188 // \|right_side\|.	186 // \|right_side\|.

189 int32_t left_side = (vec1_energy + vec2_energy) / 16;	187 int32_t left_side = (vec1_energy + vec2_energy) / 16;

190 int32_t right_side;	188 int32_t right_side;

191 if (background_noise_.initialized()) {	189 if (background_noise_.initialized()) {

192 right_side = background_noise_.Energy(master_channel_);	190 right_side = background_noise_.Energy(master_channel_);

193 } else {	191 } else {

194 // If noise parameters have not been estimated, use a fixed threshold.	192 // If noise parameters have not been estimated, use a fixed threshold.

195 right_side = 75000;	193 right_side = 75000;

196 }	194 }

197 int right_scale = 16 - WebRtcSpl_NormW32(right_side);	195 int right_scale = 16 - WebRtcSpl_NormW32(right_side);

198 right_scale = std::max(0, right_scale);	196 right_scale = std::max(0, right_scale);

199 left_side = left_side >> right_scale;	197 left_side = left_side >> right_scale;

200 right_side = peak_index * (right_side >> right_scale);	198 right_side = static_cast<int32_t>(peak_index) * (right_side >> right_scale);
	hlundin-webrtc 2015/08/10 11:30:02 rtc::checked_cast rtc::checked_cast Peter Kasting 2015/08/17 22:49:47 While today this shouldn't be necessary as \|peak_i Show quoted text On 2015/08/10 11:30:02, hlundin-webrtc wrote: > rtc::checked_cast While today this shouldn't be necessary as \|peak_index\| is guaranteed to fit in an int32_t by the caller (see reply above), I suppose in the future someone could change that. I could change the function to take an int32_t instead, but that seems more confusing, when the rest of the file uses a size_t for this concept. So, done. hlundin-webrtc 2015/08/18 07:19:18 Thanks. Future changes is what worries me sometime Show quoted text On 2015/08/17 22:49:47, Peter Kasting wrote: > On 2015/08/10 11:30:02, hlundin-webrtc wrote: > > rtc::checked_cast > > While today this shouldn't be necessary as \|peak_index\| is guaranteed to fit in > an int32_t by the caller (see reply above), I suppose in the future someone > could change that. I could change the function to take an int32_t instead, but > that seems more confusing, when the rest of the file uses a size_t for this > concept. So, done. Thanks. Future changes is what worries me sometimes.
201	199

202 // Scale \|left_side\| properly before comparing with \|right_side\|.	200 // Scale \|left_side\| properly before comparing with \|right_side\|.

203 // (\|scaling\| is the scale factor before energy calculation, thus the scale	201 // (\|scaling\| is the scale factor before energy calculation, thus the scale

204 // factor for the energy is 2 * scaling.)	202 // factor for the energy is 2 * scaling.)

205 if (WebRtcSpl_NormW32(left_side) < 2 * scaling) {	203 if (WebRtcSpl_NormW32(left_side) < 2 * scaling) {

206 // Cannot scale only \|left_side\|, must scale \|right_side\| too.	204 // Cannot scale only \|left_side\|, must scale \|right_side\| too.

207 int temp_scale = WebRtcSpl_NormW32(left_side);	205 int temp_scale = WebRtcSpl_NormW32(left_side);

208 left_side = left_side << temp_scale;	206 left_side = left_side << temp_scale;

209 right_side = right_side >> (2 * scaling - temp_scale);	207 right_side = right_side >> (2 * scaling - temp_scale);

210 } else {	208 } else {

211 left_side = left_side << 2 * scaling;	209 left_side = left_side << 2 * scaling;

212 }	210 }

213 return left_side > right_side;	211 return left_side > right_side;

214 }	212 }

215	213

216 } // namespace webrtc	214 } // namespace webrtc

OLD	NEW