webrtc/modules/audio_processing/audio_buffer.cc - Issue 1226093007: Allow more than 2 input channels in AudioProcessing.

Side by Side Diff: webrtc/modules/audio_processing/audio_buffer.cc

Issue 1226093007: Allow more than 2 input channels in AudioProcessing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Change ProcessStream interface Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/common_audio/include/audio_util.h ('K') | « webrtc/modules/audio_processing/audio_buffer.h ('k') | webrtc/modules/audio_processing/audio_processing_impl.h » ('j') | webrtc/modules/audio_processing/include/audio_processing.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/audio_buffer.h"	11 #include "webrtc/modules/audio_processing/audio_buffer.h"

12	12

13 #include "webrtc/common_audio/include/audio_util.h"	13 #include "webrtc/common_audio/include/audio_util.h"

14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"	14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"

15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

16 #include "webrtc/common_audio/channel_buffer.h"	16 #include "webrtc/common_audio/channel_buffer.h"

17 #include "webrtc/modules/audio_processing/common.h"	17 #include "webrtc/modules/audio_processing/common.h"

18	18

19 namespace webrtc {	19 namespace webrtc {

20 namespace {	20 namespace {

21	21

22 const int kSamplesPer16kHzChannel = 160;	22 const int kSamplesPer16kHzChannel = 160;

23 const int kSamplesPer32kHzChannel = 320;	23 const int kSamplesPer32kHzChannel = 320;

24 const int kSamplesPer48kHzChannel = 480;	24 const int kSamplesPer48kHzChannel = 480;

25	25

26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {	26 int KeyboardChannelIndex(const StreamConfig& stream_config) {

27 switch (layout) {	27 if (!stream_config.has_keyboard()) {

28 case AudioProcessing::kMono:	28 assert(false);

29 case AudioProcessing::kStereo:	29 return -1;

30 return false;

31 case AudioProcessing::kMonoAndKeyboard:

32 case AudioProcessing::kStereoAndKeyboard:

33 return true;

34 }	30 }

35 assert(false);

36 return false;

37 }

38	31

39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {	32 switch (stream_config.num_channels()) {
	Andrew MacDonald 2015/07/22 22:47:21 I see that this is identical to the previous behav I see that this is identical to the previous behavior, but why not make it generic? return stream_config.num_channels(); Also, does the num_channels count really not include the keyboard channel? That seems counter-intuitive to me, but in any case should be documented in StreamConfig. mgraczyk 2015/07/23 00:16:54 Changed to be generic. I agree it makes sense for Show quoted text On 2015/07/22 22:47:21, andrew wrote: > I see that this is identical to the previous behavior, but why not make it > generic? > > return stream_config.num_channels(); > > Also, does the num_channels count really not include the keyboard channel? That > seems counter-intuitive to me, but in any case should be documented in > StreamConfig. Changed to be generic. I agree it makes sense for "num_channels" to match the number of channels passed by the caller. However there I it is simpler if num_channels does not include the keyboard channel for a few reasons. First, everywhere we currently use num_channels() assumes that the channel count does not include the keyboard. I think that indicates that the more natural usage is to have audio channel count and keyboard be distinct. Second, clients will have to specify "has_keyboard" anyway when there is a keyboard channel. It seems strange for them to need to indicate information about the keyboard channel in two places. Third, am I right in assuming that the output and reverse streams will never have a keyboard? If only the input stream has one, maybe it's best to make that the special case. Lastly, what should AudioProcessing::num_input_channels, AudioProcessing::num_output_channels, etc return? The number of channels including the keyboard or excluding? I believe that before this CL they returned the number of channels excluding the keyboard, so it seems inconsistent to ask clients to pass in one number yet return another from a method with the same name. What do you think? Andrew MacDonald 2015/07/23 00:49:06 That's right. Show quoted text On 2015/07/23 00:16:54, mgraczyk wrote: > On 2015/07/22 22:47:21, andrew wrote: > > I see that this is identical to the previous behavior, but why not make it > > generic? > > > > return stream_config.num_channels(); > > > > Also, does the num_channels count really not include the keyboard channel? > That > > seems counter-intuitive to me, but in any case should be documented in > > StreamConfig. > > Changed to be generic. > > I agree it makes sense for "num_channels" to match the number of channels passed > by the caller. However there I it is simpler if num_channels does not include > the keyboard channel for a few reasons. > > First, everywhere we currently use num_channels() assumes that the channel count > does not include the keyboard. I think that indicates that the more natural > usage is to have audio channel count and keyboard be distinct. > > Second, clients will have to specify "has_keyboard" anyway when there is a > keyboard channel. It seems strange for them to need to indicate information > about the keyboard channel in two places. > > Third, am I right in assuming that the output and reverse streams will never > have a keyboard? If only the input stream has one, maybe it's best to make that > the special case. That's right. Show quoted text > > Lastly, what should AudioProcessing::num_input_channels, > AudioProcessing::num_output_channels, etc return? The number of channels > including the keyboard or excluding? I believe that before this CL they > returned the number of channels excluding the keyboard, so it seems inconsistent > to ask clients to pass in one number yet return another from a method with the > same name. > > What do you think? I agree with you. The last two arguments are particularly persuasive.
40 switch (layout) {	33 case 1:

41 case AudioProcessing::kMono:

42 case AudioProcessing::kStereo:

43 assert(false);

44 return -1;

45 case AudioProcessing::kMonoAndKeyboard:

46 return 1;	34 return 1;

47 case AudioProcessing::kStereoAndKeyboard:	35 case 2:

48 return 2;	36 return 2;

49 }	37 }

50 assert(false);	38 assert(false);

51 return -1;	39 return -1;

52 }	40 }

53	41

54 template <typename T>

55 void StereoToMono(const T* left, const T* right, T* out,

56 int num_frames) {

57 for (int i = 0; i < num_frames; ++i)

58 out[i] = (left[i] + right[i]) / 2;

59 }

60

61 int NumBandsFromSamplesPerChannel(int num_frames) {	42 int NumBandsFromSamplesPerChannel(int num_frames) {

62 int num_bands = 1;	43 int num_bands = 1;

63 if (num_frames == kSamplesPer32kHzChannel \|\|	44 if (num_frames == kSamplesPer32kHzChannel \|\|

64 num_frames == kSamplesPer48kHzChannel) {	45 num_frames == kSamplesPer48kHzChannel) {

65 num_bands = rtc::CheckedDivExact(num_frames,	46 num_bands = rtc::CheckedDivExact(num_frames,

66 static_cast<int>(kSamplesPer16kHzChannel));	47 static_cast<int>(kSamplesPer16kHzChannel));

67 }	48 }

68 return num_bands;	49 return num_bands;

69 }	50 }

70	51

(...skipping 13 matching lines...) Expand all Loading...
84 num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),	65 num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),

85 num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),	66 num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),

86 mixed_low_pass_valid_(false),	67 mixed_low_pass_valid_(false),

87 reference_copied_(false),	68 reference_copied_(false),

88 activity_(AudioFrame::kVadUnknown),	69 activity_(AudioFrame::kVadUnknown),

89 keyboard_data_(NULL),	70 keyboard_data_(NULL),

90 data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {	71 data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {

91 assert(input_num_frames_ > 0);	72 assert(input_num_frames_ > 0);

92 assert(proc_num_frames_ > 0);	73 assert(proc_num_frames_ > 0);

93 assert(output_num_frames_ > 0);	74 assert(output_num_frames_ > 0);

94 assert(num_input_channels_ > 0 && num_input_channels_ <= 2);	75 assert(num_input_channels_ > 0);

95 assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);	76 assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);

96	77

97 if (input_num_frames_ != proc_num_frames_ \|\|	78 if (input_num_frames_ != proc_num_frames_ \|\|

98 output_num_frames_ != proc_num_frames_) {	79 output_num_frames_ != proc_num_frames_) {

99 // Create an intermediate buffer for resampling.	80 // Create an intermediate buffer for resampling.

100 process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,	81 process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,

101 num_proc_channels_));	82 num_proc_channels_));

102	83

103 if (input_num_frames_ != proc_num_frames_) {	84 if (input_num_frames_ != proc_num_frames_) {

104 for (int i = 0; i < num_proc_channels_; ++i) {	85 for (int i = 0; i < num_proc_channels_; ++i) {

(...skipping 18 matching lines...) Expand all Loading...
123 num_bands_));	104 num_bands_));

124 splitting_filter_.reset(new SplittingFilter(num_proc_channels_,	105 splitting_filter_.reset(new SplittingFilter(num_proc_channels_,

125 num_bands_,	106 num_bands_,

126 proc_num_frames_));	107 proc_num_frames_));

127 }	108 }

128 }	109 }

129	110

130 AudioBuffer::~AudioBuffer() {}	111 AudioBuffer::~AudioBuffer() {}

131	112

132 void AudioBuffer::CopyFrom(const float* const* data,	113 void AudioBuffer::CopyFrom(const float* const* data,

133 int num_frames,	114 const StreamConfig& stream_config) {

134 AudioProcessing::ChannelLayout layout) {	115 assert(stream_config.num_frames() == input_num_frames_);

135 assert(num_frames == input_num_frames_);	116 assert(stream_config.num_channels() == num_input_channels_);

136 assert(ChannelsFromLayout(layout) == num_input_channels_);

137 InitForNewData();	117 InitForNewData();

138 // Initialized lazily because there's a different condition in	118 // Initialized lazily because there's a different condition in

139 // DeinterleaveFrom.	119 // DeinterleaveFrom.

140 if ((num_input_channels_ == 2 && num_proc_channels_ == 1) && !input_buffer_) {	120 const bool need_to_downmix =

	121 num_input_channels_ > 1 && num_proc_channels_ == 1;

	122 if (need_to_downmix && !input_buffer_) {

141 input_buffer_.reset(	123 input_buffer_.reset(

142 new IFChannelBuffer(input_num_frames_, num_proc_channels_));	124 new IFChannelBuffer(input_num_frames_, num_proc_channels_));

143 }	125 }

144	126

145 if (HasKeyboardChannel(layout)) {	127 if (stream_config.has_keyboard()) {

146 keyboard_data_ = data[KeyboardChannelIndex(layout)];	128 keyboard_data_ = data[KeyboardChannelIndex(stream_config)];

147 }	129 }

148	130

149 // Downmix.	131 // Downmix.

150 const float* const* data_ptr = data;	132 const float* const* data_ptr = data;

151 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {	133 if (need_to_downmix) {

152 StereoToMono(data[0],	134 DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,

153 data[1],	135 input_buffer_->fbuf()->channels()[0]);

154 input_buffer_->fbuf()->channels()[0],

155 input_num_frames_);

156 data_ptr = input_buffer_->fbuf_const()->channels();	136 data_ptr = input_buffer_->fbuf_const()->channels();

157 }	137 }

158	138

159 // Resample.	139 // Resample.

160 if (input_num_frames_ != proc_num_frames_) {	140 if (input_num_frames_ != proc_num_frames_) {

161 for (int i = 0; i < num_proc_channels_; ++i) {	141 for (int i = 0; i < num_proc_channels_; ++i) {

162 input_resamplers_[i]->Resample(data_ptr[i],	142 input_resamplers_[i]->Resample(data_ptr[i],

163 input_num_frames_,	143 input_num_frames_,

164 process_buffer_->channels()[i],	144 process_buffer_->channels()[i],

165 proc_num_frames_);	145 proc_num_frames_);

166 }	146 }

167 data_ptr = process_buffer_->channels();	147 data_ptr = process_buffer_->channels();

168 }	148 }

169	149

170 // Convert to the S16 range.	150 // Convert to the S16 range.

171 for (int i = 0; i < num_proc_channels_; ++i) {	151 for (int i = 0; i < num_proc_channels_; ++i) {

172 FloatToFloatS16(data_ptr[i],	152 FloatToFloatS16(data_ptr[i],

173 proc_num_frames_,	153 proc_num_frames_,

174 data_->fbuf()->channels()[i]);	154 data_->fbuf()->channels()[i]);

175 }	155 }

176 }	156 }

177	157

178 void AudioBuffer::CopyTo(int num_frames,	158 void AudioBuffer::CopyTo(const StreamConfig& stream_config,

179 AudioProcessing::ChannelLayout layout,

180 float* const* data) {	159 float* const* data) {

181 assert(num_frames == output_num_frames_);	160 assert(stream_config.num_frames() == output_num_frames_);

182 assert(ChannelsFromLayout(layout) == num_channels_);	161 assert(stream_config.num_channels() == num_channels_);

183	162

184 // Convert to the float range.	163 // Convert to the float range.

185 float* const* data_ptr = data;	164 float* const* data_ptr = data;

186 if (output_num_frames_ != proc_num_frames_) {	165 if (output_num_frames_ != proc_num_frames_) {

187 // Convert to an intermediate buffer for subsequent resampling.	166 // Convert to an intermediate buffer for subsequent resampling.

188 data_ptr = process_buffer_->channels();	167 data_ptr = process_buffer_->channels();

189 }	168 }

190 for (int i = 0; i < num_channels_; ++i) {	169 for (int i = 0; i < num_channels_; ++i) {

191 FloatS16ToFloat(data_->fbuf()->channels()[i],	170 FloatS16ToFloat(data_->fbuf()->channels()[i],

192 proc_num_frames_,	171 proc_num_frames_,

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
320 ChannelBuffer<float>* AudioBuffer::split_data_f() {	299 ChannelBuffer<float>* AudioBuffer::split_data_f() {

321 mixed_low_pass_valid_ = false;	300 mixed_low_pass_valid_ = false;

322 return split_data_.get() ? split_data_->fbuf() : data_->fbuf();	301 return split_data_.get() ? split_data_->fbuf() : data_->fbuf();

323 }	302 }

324	303

325 const ChannelBuffer<float>* AudioBuffer::split_data_f() const {	304 const ChannelBuffer<float>* AudioBuffer::split_data_f() const {

326 return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();	305 return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();

327 }	306 }

328	307

329 const int16_t* AudioBuffer::mixed_low_pass_data() {	308 const int16_t* AudioBuffer::mixed_low_pass_data() {

330 // Currently only mixing stereo to mono is supported.

331 assert(num_proc_channels_ == 1 \|\| num_proc_channels_ == 2);

332

333 if (num_proc_channels_ == 1) {	309 if (num_proc_channels_ == 1) {

334 return split_bands_const(0)[kBand0To8kHz];	310 return split_bands_const(0)[kBand0To8kHz];

335 }	311 }

336	312

337 if (!mixed_low_pass_valid_) {	313 if (!mixed_low_pass_valid_) {

338 if (!mixed_low_pass_channels_.get()) {	314 if (!mixed_low_pass_channels_.get()) {

339 mixed_low_pass_channels_.reset(	315 mixed_low_pass_channels_.reset(

340 new ChannelBuffer<int16_t>(num_split_frames_, 1));	316 new ChannelBuffer<int16_t>(num_split_frames_, 1));

341 }	317 }

342 StereoToMono(split_bands_const(0)[kBand0To8kHz],	318

343 split_bands_const(1)[kBand0To8kHz],	319 DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),

344 mixed_low_pass_channels_->channels()[0],	320 num_split_frames_, num_channels_,

345 num_split_frames_);	321 mixed_low_pass_channels_->channels()[0]);

346 mixed_low_pass_valid_ = true;	322 mixed_low_pass_valid_ = true;

347 }	323 }

348 return mixed_low_pass_channels_->channels()[0];	324 return mixed_low_pass_channels_->channels()[0];

349 }	325 }

350	326

351 const int16_t* AudioBuffer::low_pass_reference(int channel) const {	327 const int16_t* AudioBuffer::low_pass_reference(int channel) const {

352 if (!reference_copied_) {	328 if (!reference_copied_) {

353 return NULL;	329 return NULL;

354 }	330 }

355	331

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
404 new IFChannelBuffer(input_num_frames_, num_proc_channels_));	380 new IFChannelBuffer(input_num_frames_, num_proc_channels_));

405 }	381 }

406 activity_ = frame->vad_activity_;	382 activity_ = frame->vad_activity_;

407	383

408 int16_t* const* deinterleaved;	384 int16_t* const* deinterleaved;

409 if (input_num_frames_ == proc_num_frames_) {	385 if (input_num_frames_ == proc_num_frames_) {

410 deinterleaved = data_->ibuf()->channels();	386 deinterleaved = data_->ibuf()->channels();

411 } else {	387 } else {

412 deinterleaved = input_buffer_->ibuf()->channels();	388 deinterleaved = input_buffer_->ibuf()->channels();

413 }	389 }

414 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {	390 if (num_proc_channels_ == 1) {

415 // Downmix directly; no explicit deinterleaving needed.	391 // Downmix and deinterleave simultaneously.

416 for (int i = 0; i < input_num_frames_; ++i) {	392 DownmixInterleavedToMono(frame->data_, input_num_frames_,

417 deinterleaved[0][i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;	393 num_input_channels_, deinterleaved[0]);

418 }

419 } else {	394 } else {

420 assert(num_proc_channels_ == num_input_channels_);	395 assert(num_proc_channels_ == num_input_channels_);

421 Deinterleave(frame->data_,	396 Deinterleave(frame->data_,

422 input_num_frames_,	397 input_num_frames_,

423 num_proc_channels_,	398 num_proc_channels_,

424 deinterleaved);	399 deinterleaved);

425 }	400 }

426	401

427 // Resample.	402 // Resample.

428 if (input_num_frames_ != proc_num_frames_) {	403 if (input_num_frames_ != proc_num_frames_) {

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
470	445

471 void AudioBuffer::SplitIntoFrequencyBands() {	446 void AudioBuffer::SplitIntoFrequencyBands() {

472 splitting_filter_->Analysis(data_.get(), split_data_.get());	447 splitting_filter_->Analysis(data_.get(), split_data_.get());

473 }	448 }

474	449

475 void AudioBuffer::MergeFrequencyBands() {	450 void AudioBuffer::MergeFrequencyBands() {

476 splitting_filter_->Synthesis(split_data_.get(), data_.get());	451 splitting_filter_->Synthesis(split_data_.get(), data_.get());

477 }	452 }

478	453

479 } // namespace webrtc	454 } // namespace webrtc

OLD	NEW