webrtc/modules/audio_coding/neteq/time_stretch.cc - Issue 1228843002: Update audio code to use size_t more correctly,

Side by Side Diff: webrtc/modules/audio_coding/neteq/time_stretch.cc

Issue 1228843002: Update audio code to use size_t more correctly, (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Review comments Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/time_stretch.h"	11 #include "webrtc/modules/audio_coding/neteq/time_stretch.h"

12	12

13 #include <algorithm> // min, max	13 #include <algorithm> // min, max

14	14

	15 #include "webrtc/base/safe_conversions.h"

15 #include "webrtc/base/scoped_ptr.h"	16 #include "webrtc/base/scoped_ptr.h"

16 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	17 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

17 #include "webrtc/modules/audio_coding/neteq/background_noise.h"	18 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

18 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	19 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

19	20

20 namespace webrtc {	21 namespace webrtc {

21	22

22 TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,	23 TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,

23 size_t input_len,	24 size_t input_len,

24 bool fast_mode,	25 bool fast_mode,

25 AudioMultiVector* output,	26 AudioMultiVector* output,

26 int16_t* length_change_samples) {	27 size_t* length_change_samples) {

27 // Pre-calculate common multiplication with \|fs_mult_\|.	28 // Pre-calculate common multiplication with \|fs_mult_\|.

28 int fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms.	29 size_t fs_mult_120 =

	30 static_cast<size_t>(fs_mult_ * 120); // Corresponds to 15 ms.

29	31

30 const int16_t* signal;	32 const int16_t* signal;

31 rtc::scoped_ptr<int16_t[]> signal_array;	33 rtc::scoped_ptr<int16_t[]> signal_array;

32 size_t signal_len;	34 size_t signal_len;

33 if (num_channels_ == 1) {	35 if (num_channels_ == 1) {

34 signal = input;	36 signal = input;

35 signal_len = input_len;	37 signal_len = input_len;

36 } else {	38 } else {

37 // We want \|signal\| to be only the first channel of \|input\|, which is	39 // We want \|signal\| to be only the first channel of \|input\|, which is

38 // interleaved. Thus, we take the first sample, skip forward \|num_channels\|	40 // interleaved. Thus, we take the first sample, skip forward \|num_channels\|

39 // samples, and continue like that.	41 // samples, and continue like that.

40 signal_len = input_len / num_channels_;	42 signal_len = input_len / num_channels_;

41 signal_array.reset(new int16_t[signal_len]);	43 signal_array.reset(new int16_t[signal_len]);

42 signal = signal_array.get();	44 signal = signal_array.get();

43 size_t j = master_channel_;	45 size_t j = master_channel_;

44 for (size_t i = 0; i < signal_len; ++i) {	46 for (size_t i = 0; i < signal_len; ++i) {

45 signal_array[i] = input[j];	47 signal_array[i] = input[j];

46 j += num_channels_;	48 j += num_channels_;

47 }	49 }

48 }	50 }

49	51

50 // Find maximum absolute value of input signal.	52 // Find maximum absolute value of input signal.

51 max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal,	53 max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len);

52 static_cast<int>(signal_len));

53	54

54 // Downsample to 4 kHz sample rate and calculate auto-correlation.	55 // Downsample to 4 kHz sample rate and calculate auto-correlation.

55 DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,	56 DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,

56 sample_rate_hz_, true /* compensate delay*/,	57 sample_rate_hz_, true /* compensate delay*/,

57 downsampled_input_);	58 downsampled_input_);

58 AutoCorrelation();	59 AutoCorrelation();

59	60

60 // Find the strongest correlation peak.	61 // Find the strongest correlation peak.

61 static const int kNumPeaks = 1;	62 static const size_t kNumPeaks = 1;

62 int peak_index;	63 size_t peak_index;

63 int16_t peak_value;	64 int16_t peak_value;

64 DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,	65 DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,

65 fs_mult_, &peak_index, &peak_value);	66 fs_mult_, &peak_index, &peak_value);

66 // Assert that \|peak_index\| stays within boundaries.	67 // Assert that \|peak_index\| stays within boundaries.

67 assert(peak_index >= 0);

68 assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);	68 assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);

69	69

70 // Compensate peak_index for displaced starting position. The displacement	70 // Compensate peak_index for displaced starting position. The displacement

71 // happens in AutoCorrelation(). Here, \|kMinLag\| is in the down-sampled 4 kHz	71 // happens in AutoCorrelation(). Here, \|kMinLag\| is in the down-sampled 4 kHz

72 // domain, while the \|peak_index\| is in the original sample rate; hence, the	72 // domain, while the \|peak_index\| is in the original sample rate; hence, the

73 // multiplication by fs_mult_ * 2.	73 // multiplication by fs_mult_ * 2.

74 peak_index += kMinLag * fs_mult_ * 2;	74 peak_index += kMinLag * fs_mult_ * 2;

75 // Assert that \|peak_index\| stays within boundaries.	75 // Assert that \|peak_index\| stays within boundaries.

76 assert(peak_index >= 20 * fs_mult_);	76 assert(peak_index >= static_cast<size_t>(20 * fs_mult_));

77 assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);	77 assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);

78	78

79 // Calculate scaling to ensure that \|peak_index\| samples can be square-summed	79 // Calculate scaling to ensure that \|peak_index\| samples can be square-summed

80 // without overflowing.	80 // without overflowing.

81 int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -	81 int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -

82 WebRtcSpl_NormW32(peak_index);	82 WebRtcSpl_NormW32(static_cast<int32_t>(peak_index));

83 scaling = std::max(0, scaling);	83 scaling = std::max(0, scaling);

84	84

85 // \|vec1\| starts at 15 ms minus one pitch period.	85 // \|vec1\| starts at 15 ms minus one pitch period.

86 const int16_t* vec1 = &signal[fs_mult_120 - peak_index];	86 const int16_t* vec1 = &signal[fs_mult_120 - peak_index];

87 // \|vec2\| start at 15 ms.	87 // \|vec2\| start at 15 ms.

88 const int16_t* vec2 = &signal[fs_mult_120];	88 const int16_t* vec2 = &signal[fs_mult_120];

89 // Calculate energies for \|vec1\| and \|vec2\|, assuming they both contain	89 // Calculate energies for \|vec1\| and \|vec2\|, assuming they both contain

90 // \|peak_index\| samples.	90 // \|peak_index\| samples.

91 int32_t vec1_energy =	91 int32_t vec1_energy =

92 WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling);	92 WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling);

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
170 kCorrelationLen, kMaxLag - kMinLag, scaling, -1);	170 kCorrelationLen, kMaxLag - kMinLag, scaling, -1);

171	171

172 // Normalize correlation to 14 bits and write to \|auto_correlation_\|.	172 // Normalize correlation to 14 bits and write to \|auto_correlation_\|.

173 int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);	173 int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);

174 scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));	174 scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));

175 WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,	175 WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,

176 auto_corr, scaling);	176 auto_corr, scaling);

177 }	177 }

178	178

179 bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,	179 bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,

180 int peak_index, int scaling) const {	180 size_t peak_index, int scaling) const {

181 // Check if the signal seems to be active speech or not (simple VAD).	181 // Check if the signal seems to be active speech or not (simple VAD).

182 // If (vec1_energy + vec2_energy) / (2 * peak_index) <=	182 // If (vec1_energy + vec2_energy) / (2 * peak_index) <=

183 // 8 * background_noise_energy, then we say that the signal contains no	183 // 8 * background_noise_energy, then we say that the signal contains no

184 // active speech.	184 // active speech.

185 // Rewrite the inequality as:	185 // Rewrite the inequality as:

186 // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy.	186 // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy.

187 // The two sides of the inequality will be denoted \|left_side\| and	187 // The two sides of the inequality will be denoted \|left_side\| and

188 // \|right_side\|.	188 // \|right_side\|.

189 int32_t left_side = (vec1_energy + vec2_energy) / 16;	189 int32_t left_side = (vec1_energy + vec2_energy) / 16;

190 int32_t right_side;	190 int32_t right_side;

191 if (background_noise_.initialized()) {	191 if (background_noise_.initialized()) {

192 right_side = background_noise_.Energy(master_channel_);	192 right_side = background_noise_.Energy(master_channel_);

193 } else {	193 } else {

194 // If noise parameters have not been estimated, use a fixed threshold.	194 // If noise parameters have not been estimated, use a fixed threshold.

195 right_side = 75000;	195 right_side = 75000;

196 }	196 }

197 int right_scale = 16 - WebRtcSpl_NormW32(right_side);	197 int right_scale = 16 - WebRtcSpl_NormW32(right_side);

198 right_scale = std::max(0, right_scale);	198 right_scale = std::max(0, right_scale);

199 left_side = left_side >> right_scale;	199 left_side = left_side >> right_scale;

200 right_side = peak_index * (right_side >> right_scale);	200 right_side =

	201 rtc::checked_cast<int32_t>(peak_index) * (right_side >> right_scale);

201	202

202 // Scale \|left_side\| properly before comparing with \|right_side\|.	203 // Scale \|left_side\| properly before comparing with \|right_side\|.

203 // (\|scaling\| is the scale factor before energy calculation, thus the scale	204 // (\|scaling\| is the scale factor before energy calculation, thus the scale

204 // factor for the energy is 2 * scaling.)	205 // factor for the energy is 2 * scaling.)

205 if (WebRtcSpl_NormW32(left_side) < 2 * scaling) {	206 if (WebRtcSpl_NormW32(left_side) < 2 * scaling) {

206 // Cannot scale only \|left_side\|, must scale \|right_side\| too.	207 // Cannot scale only \|left_side\|, must scale \|right_side\| too.

207 int temp_scale = WebRtcSpl_NormW32(left_side);	208 int temp_scale = WebRtcSpl_NormW32(left_side);

208 left_side = left_side << temp_scale;	209 left_side = left_side << temp_scale;

209 right_side = right_side >> (2 * scaling - temp_scale);	210 right_side = right_side >> (2 * scaling - temp_scale);

210 } else {	211 } else {

211 left_side = left_side << 2 * scaling;	212 left_side = left_side << 2 * scaling;

212 }	213 }

213 return left_side > right_side;	214 return left_side > right_side;

214 }	215 }

215	216

216 } // namespace webrtc	217 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_coding/neteq/time_stretch.h ('k') | webrtc/modules/audio_coding/neteq/time_stretch_unittest.cc » ('j') | no next file with comments »