webrtc/modules/audio_coding/neteq/merge.cc - Issue 1925053002: Revert of Avoiding overflow in cross correlation in NetEq.

Side by Side Diff: webrtc/modules/audio_coding/neteq/merge.cc

Issue 1925053002: Revert of Avoiding overflow in cross correlation in NetEq. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_coding/neteq/merge.h"	11 #include "webrtc/modules/audio_coding/neteq/merge.h"

12	12

13 #include <assert.h>	13 #include <assert.h>

14 #include <string.h> // memmove, memcpy, memset, size_t	14 #include <string.h> // memmove, memcpy, memset, size_t

15	15

16 #include <algorithm> // min, max	16 #include <algorithm> // min, max

17 #include <memory>	17 #include <memory>

18	18

19 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	19 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

20 #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"	20 #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"

21 #include "webrtc/modules/audio_coding/neteq/cross_correlation.h"

22 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"	21 #include "webrtc/modules/audio_coding/neteq/dsp_helper.h"

23 #include "webrtc/modules/audio_coding/neteq/expand.h"	22 #include "webrtc/modules/audio_coding/neteq/expand.h"

24 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"	23 #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"

25	24

26 namespace webrtc {	25 namespace webrtc {

27	26

28 Merge::Merge(int fs_hz,	27 Merge::Merge(int fs_hz,

29 size_t num_channels,	28 size_t num_channels,

30 Expand* expand,	29 Expand* expand,

31 SyncBuffer* sync_buffer)	30 SyncBuffer* sync_buffer)

(...skipping 25 matching lines...) Expand all Loading...
57 input_vector.PushBackInterleaved(input, input_length);	56 input_vector.PushBackInterleaved(input, input_length);

58 size_t input_length_per_channel = input_vector.Size();	57 size_t input_length_per_channel = input_vector.Size();

59 assert(input_length_per_channel == input_length / num_channels_);	58 assert(input_length_per_channel == input_length / num_channels_);

60	59

61 size_t best_correlation_index = 0;	60 size_t best_correlation_index = 0;

62 size_t output_length = 0;	61 size_t output_length = 0;

63	62

64 for (size_t channel = 0; channel < num_channels_; ++channel) {	63 for (size_t channel = 0; channel < num_channels_; ++channel) {

65 int16_t* input_channel = &input_vector[channel][0];	64 int16_t* input_channel = &input_vector[channel][0];

66 int16_t* expanded_channel = &expanded_[channel][0];	65 int16_t* expanded_channel = &expanded_[channel][0];

	66 int16_t expanded_max, input_max;

67 int16_t new_mute_factor = SignalScaling(	67 int16_t new_mute_factor = SignalScaling(

68 input_channel, input_length_per_channel, expanded_channel);	68 input_channel, input_length_per_channel, expanded_channel,

	69 &expanded_max, &input_max);

69	70

70 // Adjust muting factor (product of "main" muting factor and expand muting	71 // Adjust muting factor (product of "main" muting factor and expand muting

71 // factor).	72 // factor).

72 int16_t* external_mute_factor = &external_mute_factor_array[channel];	73 int16_t* external_mute_factor = &external_mute_factor_array[channel];

73 *external_mute_factor =	74 *external_mute_factor =

74 (external_mute_factor expand_->MuteFactor(channel)) >> 14;	75 (external_mute_factor expand_->MuteFactor(channel)) >> 14;

75	76

76 // Update \|external_mute_factor\| if it is lower than \|new_mute_factor\|.	77 // Update \|external_mute_factor\| if it is lower than \|new_mute_factor\|.

77 if (new_mute_factor > *external_mute_factor) {	78 if (new_mute_factor > *external_mute_factor) {

78 *external_mute_factor = std::min(new_mute_factor,	79 *external_mute_factor = std::min(new_mute_factor,

79 static_cast<int16_t>(16384));	80 static_cast<int16_t>(16384));

80 }	81 }

81	82

82 if (channel == 0) {	83 if (channel == 0) {

83 // Downsample, correlate, and find strongest correlation period for the	84 // Downsample, correlate, and find strongest correlation period for the

84 // master (i.e., first) channel only.	85 // master (i.e., first) channel only.

85 // Downsample to 4kHz sample rate.	86 // Downsample to 4kHz sample rate.

86 Downsample(input_channel, input_length_per_channel, expanded_channel,	87 Downsample(input_channel, input_length_per_channel, expanded_channel,

87 expanded_length);	88 expanded_length);

88	89

89 // Calculate the lag of the strongest correlation period.	90 // Calculate the lag of the strongest correlation period.

90 best_correlation_index = CorrelateAndPeakSearch(	91 best_correlation_index = CorrelateAndPeakSearch(

91 old_length, input_length_per_channel, expand_period);	92 expanded_max, input_max, old_length,

	93 input_length_per_channel, expand_period);

92 }	94 }

93	95

94 static const int kTempDataSize = 3600;	96 static const int kTempDataSize = 3600;

95 int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.	97 int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.

96 int16_t* decoded_output = temp_data + best_correlation_index;	98 int16_t* decoded_output = temp_data + best_correlation_index;

97	99

98 // Mute the new decoded data if needed (and unmute it linearly).	100 // Mute the new decoded data if needed (and unmute it linearly).

99 // This is the overlapping part of expanded_signal.	101 // This is the overlapping part of expanded_signal.

100 size_t interpolation_length = std::min(	102 size_t interpolation_length = std::min(

101 kMaxCorrelationLength * fs_mult_,	103 kMaxCorrelationLength * fs_mult_,

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
195 expanded_.PushBack(expanded_temp);	197 expanded_.PushBack(expanded_temp);

196 }	198 }

197 // Trim the length to exactly \|required_length\|.	199 // Trim the length to exactly \|required_length\|.

198 expanded_.PopBack(expanded_.Size() - required_length);	200 expanded_.PopBack(expanded_.Size() - required_length);

199 }	201 }

200 assert(expanded_.Size() >= required_length);	202 assert(expanded_.Size() >= required_length);

201 return required_length;	203 return required_length;

202 }	204 }

203	205

204 int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,	206 int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,

205 const int16_t* expanded_signal) const {	207 const int16_t* expanded_signal,

	208 int16_t* expanded_max, int16_t* input_max) const {

206 // Adjust muting factor if new vector is more or less of the BGN energy.	209 // Adjust muting factor if new vector is more or less of the BGN energy.

207 const size_t mod_input_length =	210 const size_t mod_input_length =

208 std::min(static_cast<size_t>(64 * fs_mult_), input_length);	211 std::min(static_cast<size_t>(64 * fs_mult_), input_length);

209 const int16_t expanded_max =	212 *expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);

210 WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);	213 *input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);

211 const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);

212	214

213 // Calculate energy of expanded signal.	215 // Calculate energy of expanded signal.

214 // \|log_fs_mult\| is log2(fs_mult_), but is not exact for 48000 Hz.	216 // \|log_fs_mult\| is log2(fs_mult_), but is not exact for 48000 Hz.

215 int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);	217 int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);

216 int expanded_shift = 6 + log_fs_mult	218 int expanded_shift = 6 + log_fs_mult

217 - WebRtcSpl_NormW32(expanded_max * expanded_max);	219 - WebRtcSpl_NormW32(expanded_max *expanded_max);

218 expanded_shift = std::max(expanded_shift, 0);	220 expanded_shift = std::max(expanded_shift, 0);

219 int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,	221 int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,

220 expanded_signal,	222 expanded_signal,

221 mod_input_length,	223 mod_input_length,

222 expanded_shift);	224 expanded_shift);

223	225

224 // Calculate energy of input signal.	226 // Calculate energy of input signal.

225 int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max);	227 int input_shift = 6 + log_fs_mult -

	228 WebRtcSpl_NormW32(input_max *input_max);

226 input_shift = std::max(input_shift, 0);	229 input_shift = std::max(input_shift, 0);

227 int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,	230 int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,

228 mod_input_length,	231 mod_input_length,

229 input_shift);	232 input_shift);

230	233

231 // Align to the same Q-domain.	234 // Align to the same Q-domain.

232 if (input_shift > expanded_shift) {	235 if (input_shift > expanded_shift) {

233 energy_expanded = energy_expanded >> (input_shift - expanded_shift);	236 energy_expanded = energy_expanded >> (input_shift - expanded_shift);

234 } else {	237 } else {

235 energy_input = energy_input >> (expanded_shift - input_shift);	238 energy_input = energy_input >> (expanded_shift - input_shift);

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
297 sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len));	300 sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len));

298 } else {	301 } else {

299 WebRtcSpl_DownsampleFast(&input[signal_offset],	302 WebRtcSpl_DownsampleFast(&input[signal_offset],

300 input_length - signal_offset, input_downsampled_,	303 input_length - signal_offset, input_downsampled_,

301 kInputDownsampLength, filter_coefficients,	304 kInputDownsampLength, filter_coefficients,

302 num_coefficients, decimation_factor,	305 num_coefficients, decimation_factor,

303 kCompensateDelay);	306 kCompensateDelay);

304 }	307 }

305 }	308 }

306	309

307 size_t Merge::CorrelateAndPeakSearch(size_t start_position, size_t input_length,	310 size_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,

	311 size_t start_position, size_t input_length,

308 size_t expand_period) const {	312 size_t expand_period) const {

309 // Calculate correlation without any normalization.	313 // Calculate correlation without any normalization.

310 const size_t max_corr_length = kMaxCorrelationLength;	314 const size_t max_corr_length = kMaxCorrelationLength;

311 size_t stop_position_downsamp =	315 size_t stop_position_downsamp =

312 std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);	316 std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);

	317 int correlation_shift = 0;

	318 if (expanded_max * input_max > 26843546) {

	319 correlation_shift = 3;

	320 }

313	321

314 int32_t correlation[kMaxCorrelationLength];	322 int32_t correlation[kMaxCorrelationLength];

315 CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_,	323 WebRtcSpl_CrossCorrelation(correlation, input_downsampled_,

316 kInputDownsampLength, stop_position_downsamp, 1,	324 expanded_downsampled_, kInputDownsampLength,

317 correlation);	325 stop_position_downsamp, correlation_shift, 1);

318	326

319 // Normalize correlation to 14 bits and copy to a 16-bit array.	327 // Normalize correlation to 14 bits and copy to a 16-bit array.

320 const size_t pad_length = expand_->overlap_length() - 1;	328 const size_t pad_length = expand_->overlap_length() - 1;

321 const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;	329 const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;

322 std::unique_ptr<int16_t[]> correlation16(	330 std::unique_ptr<int16_t[]> correlation16(

323 new int16_t[correlation_buffer_size]);	331 new int16_t[correlation_buffer_size]);

324 memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));	332 memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));

325 int16_t* correlation_ptr = &correlation16[pad_length];	333 int16_t* correlation_ptr = &correlation16[pad_length];

326 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,	334 int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,

327 stop_position_downsamp);	335 stop_position_downsamp);

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
366 }	374 }

367 return best_correlation_index;	375 return best_correlation_index;

368 }	376 }

369	377

370 size_t Merge::RequiredFutureSamples() {	378 size_t Merge::RequiredFutureSamples() {

371 return fs_hz_ / 100 * num_channels_; // 10 ms.	379 return fs_hz_ / 100 * num_channels_; // 10 ms.

372 }	380 }

373	381

374	382

375 } // namespace webrtc	383 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_coding/neteq/merge.h ('k') | webrtc/modules/audio_coding/neteq/neteq.gypi » ('j') | no next file with comments »