webrtc/modules/audio_coding/neteq/merge.cc - Issue 1901633002: Adding 120 ms frame length support in NetEq.

Side by Side Diff: webrtc/modules/audio_coding/neteq/merge.cc

Issue 1901633002: Adding 120 ms frame length support in NetEq. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: fixing two errors Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 21 matching lines...) Expand all Loading...
32 : fs_hz_(fs_hz),	32 : fs_hz_(fs_hz),

33 num_channels_(num_channels),	33 num_channels_(num_channels),

34 fs_mult_(fs_hz_ / 8000),	34 fs_mult_(fs_hz_ / 8000),

35 timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)),	35 timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)),

36 expand_(expand),	36 expand_(expand),

37 sync_buffer_(sync_buffer),	37 sync_buffer_(sync_buffer),

38 expanded_(num_channels_) {	38 expanded_(num_channels_) {

39 assert(num_channels_ > 0);	39 assert(num_channels_ > 0);

40 }	40 }

41	41

	42 Merge::~Merge() = default;

	43

42 size_t Merge::Process(int16_t* input, size_t input_length,	44 size_t Merge::Process(int16_t* input, size_t input_length,

43 int16_t* external_mute_factor_array,	45 int16_t* external_mute_factor_array,

44 AudioMultiVector* output) {	46 AudioMultiVector* output) {

45 // TODO(hlundin): Change to an enumerator and skip assert.	47 // TODO(hlundin): Change to an enumerator and skip assert.

46 assert(fs_hz_ == 8000 \|\| fs_hz_ == 16000 \|\| fs_hz_ == 32000 \|\|	48 assert(fs_hz_ == 8000 \|\| fs_hz_ == 16000 \|\| fs_hz_ == 32000 \|\|

47 fs_hz_ == 48000);	49 fs_hz_ == 48000);

48 assert(fs_hz_ <= kMaxSampleRate); // Should not be possible.	50 assert(fs_hz_ <= kMaxSampleRate); // Should not be possible.

49	51

50 size_t old_length;	52 size_t old_length;

51 size_t expand_period;	53 size_t expand_period;

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
84 // master (i.e., first) channel only.	86 // master (i.e., first) channel only.

85 // Downsample to 4kHz sample rate.	87 // Downsample to 4kHz sample rate.

86 Downsample(input_channel, input_length_per_channel, expanded_channel,	88 Downsample(input_channel, input_length_per_channel, expanded_channel,

87 expanded_length);	89 expanded_length);

88	90

89 // Calculate the lag of the strongest correlation period.	91 // Calculate the lag of the strongest correlation period.

90 best_correlation_index = CorrelateAndPeakSearch(	92 best_correlation_index = CorrelateAndPeakSearch(

91 old_length, input_length_per_channel, expand_period);	93 old_length, input_length_per_channel, expand_period);

92 }	94 }

93	95

94 static const int kTempDataSize = 3600;	96 temp_data_.resize(input_length_per_channel + best_correlation_index);

95 int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.	97 int16_t* decoded_output = temp_data_.data() + best_correlation_index;

96 int16_t* decoded_output = temp_data + best_correlation_index;

97	98

98 // Mute the new decoded data if needed (and unmute it linearly).	99 // Mute the new decoded data if needed (and unmute it linearly).

99 // This is the overlapping part of expanded_signal.	100 // This is the overlapping part of expanded_signal.

100 size_t interpolation_length = std::min(	101 size_t interpolation_length = std::min(

101 kMaxCorrelationLength * fs_mult_,	102 kMaxCorrelationLength * fs_mult_,

102 expanded_length - best_correlation_index);	103 expanded_length - best_correlation_index);

103 interpolation_length = std::min(interpolation_length,	104 interpolation_length = std::min(interpolation_length,

104 input_length_per_channel);	105 input_length_per_channel);

105 if (*external_mute_factor < 16384) {	106 if (*external_mute_factor < 16384) {

106 // Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,	107 // Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,

(...skipping 13 matching lines...) Expand all Loading...
120 memmove(	121 memmove(

121 &decoded_output[interpolation_length],	122 &decoded_output[interpolation_length],

122 &input_channel[interpolation_length],	123 &input_channel[interpolation_length],

123 sizeof(int16_t) * (input_length_per_channel - interpolation_length));	124 sizeof(int16_t) * (input_length_per_channel - interpolation_length));

124 }	125 }

125	126

126 // Do overlap and mix linearly.	127 // Do overlap and mix linearly.

127 int16_t increment =	128 int16_t increment =

128 static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14.	129 static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14.

129 int16_t mute_factor = 16384 - increment;	130 int16_t mute_factor = 16384 - increment;

130 memmove(temp_data, expanded_channel,	131 memmove(temp_data_.data(), expanded_channel,

131 sizeof(int16_t) * best_correlation_index);	132 sizeof(int16_t) * best_correlation_index);

132 DspHelper::CrossFade(&expanded_channel[best_correlation_index],	133 DspHelper::CrossFade(&expanded_channel[best_correlation_index],

133 input_channel, interpolation_length,	134 input_channel, interpolation_length,

134 &mute_factor, increment, decoded_output);	135 &mute_factor, increment, decoded_output);

135	136

136 output_length = best_correlation_index + input_length_per_channel;	137 output_length = best_correlation_index + input_length_per_channel;

137 if (channel == 0) {	138 if (channel == 0) {

138 assert(output->Empty()); // Output should be empty at this point.	139 assert(output->Empty()); // Output should be empty at this point.

139 output->AssertSize(output_length);	140 output->AssertSize(output_length);

140 } else {	141 } else {

141 assert(output->Size() == output_length);	142 assert(output->Size() == output_length);

142 }	143 }

143 memcpy(&(*output)[channel][0], temp_data,	144 memcpy(&(*output)[channel][0], temp_data_.data(),

144 sizeof(temp_data[0]) * output_length);	145 sizeof(temp_data_[0]) * output_length);

145 }	146 }

146	147

147 // Copy back the first part of the data to \|sync_buffer_\| and remove it from	148 // Copy back the first part of the data to \|sync_buffer_\| and remove it from

148 // \|output\|.	149 // \|output\|.

149 sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index());	150 sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index());

150 output->PopFront(old_length);	151 output->PopFront(old_length);

151	152

152 // Return new added length. \|old_length\| samples were borrowed from	153 // Return new added length. \|old_length\| samples were borrowed from

153 // \|sync_buffer_\|.	154 // \|sync_buffer_\|.

154 return output_length - old_length;	155 return output_length - old_length;

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
201 return required_length;	202 return required_length;

202 }	203 }

203	204

204 int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,	205 int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,

205 const int16_t* expanded_signal) const {	206 const int16_t* expanded_signal) const {

206 // Adjust muting factor if new vector is more or less of the BGN energy.	207 // Adjust muting factor if new vector is more or less of the BGN energy.

207 const size_t mod_input_length =	208 const size_t mod_input_length =

208 std::min(static_cast<size_t>(64 * fs_mult_), input_length);	209 std::min(static_cast<size_t>(64 * fs_mult_), input_length);

209 const int16_t expanded_max =	210 const int16_t expanded_max =

210 WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);	211 WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);

211 const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);	212 int32_t factor = (expanded_max * expanded_max) /
	minyue-webrtc 2016/05/02 10:44:52 first fix: old code does give enough shifts, sinc first fix: old code does give enough shifts, since log_fs_mult is not right for 48000 Hz hlundin-webrtc 2016/05/02 11:28:15 Acknowledged. Show quoted text On 2016/05/02 10:44:52, minyue-webrtc wrote: > first fix: > > old code does give enough shifts, since log_fs_mult is not right for 48000 Hz Acknowledged. minyue-webrtc 2016/05/02 11:41:12 ok. but I ought to modify my sentence as "old code Show quoted text On 2016/05/02 11:28:15, hlundin-webrtc wrote: > On 2016/05/02 10:44:52, minyue-webrtc wrote: > > first fix: > > > > old code does give enough shifts, since log_fs_mult is not right for 48000 Hz > > Acknowledged. ok. but I ought to modify my sentence as "old code does NOT give enough shifts". I think you have realized it.
212	213 (std::numeric_limits<int32_t>::max() /

213 // Calculate energy of expanded signal.	214 static_cast<int32_t>(mod_input_length));

214 // \|log_fs_mult\| is log2(fs_mult_), but is not exact for 48000 Hz.	215 const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);

215 int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);

216 int expanded_shift = 6 + log_fs_mult

217 - WebRtcSpl_NormW32(expanded_max * expanded_max);

218 expanded_shift = std::max(expanded_shift, 0);

219 int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,	216 int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,

220 expanded_signal,	217 expanded_signal,

221 mod_input_length,	218 mod_input_length,

222 expanded_shift);	219 expanded_shift);

223	220

224 // Calculate energy of input signal.	221 // Calculate energy of input signal.

225 int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max);	222 const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);

226 input_shift = std::max(input_shift, 0);	223 factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() /

	224 static_cast<int32_t>(mod_input_length));

	225 const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);

227 int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,	226 int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,

228 mod_input_length,	227 mod_input_length,

229 input_shift);	228 input_shift);

230	229

231 // Align to the same Q-domain.	230 // Align to the same Q-domain.

232 if (input_shift > expanded_shift) {	231 if (input_shift > expanded_shift) {

233 energy_expanded = energy_expanded >> (input_shift - expanded_shift);	232 energy_expanded = energy_expanded >> (input_shift - expanded_shift);

234 } else {	233 } else {

235 energy_input = energy_input >> (expanded_shift - input_shift);	234 energy_input = energy_input >> (expanded_shift - input_shift);

236 }	235 }

(...skipping 129 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
366 }	365 }

367 return best_correlation_index;	366 return best_correlation_index;

368 }	367 }

369	368

370 size_t Merge::RequiredFutureSamples() {	369 size_t Merge::RequiredFutureSamples() {

371 return fs_hz_ / 100 * num_channels_; // 10 ms.	370 return fs_hz_ / 100 * num_channels_; // 10 ms.

372 }	371 }

373	372

374	373

375 } // namespace webrtc	374 } // namespace webrtc

OLD	NEW