OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/audio_buffer.h" | 11 #include "webrtc/modules/audio_processing/audio_buffer.h" |
12 | 12 |
13 #include "webrtc/common_audio/include/audio_util.h" | 13 #include "webrtc/common_audio/include/audio_util.h" |
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h" | 14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h" |
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" | 15 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" |
16 #include "webrtc/common_audio/channel_buffer.h" | 16 #include "webrtc/common_audio/channel_buffer.h" |
17 #include "webrtc/modules/audio_processing/common.h" | 17 #include "webrtc/modules/audio_processing/common.h" |
18 | 18 |
19 namespace webrtc { | 19 namespace webrtc { |
20 namespace { | 20 namespace { |
21 | 21 |
22 const int kSamplesPer16kHzChannel = 160; | 22 const int kSamplesPer16kHzChannel = 160; |
23 const int kSamplesPer32kHzChannel = 320; | 23 const int kSamplesPer32kHzChannel = 320; |
24 const int kSamplesPer48kHzChannel = 480; | 24 const int kSamplesPer48kHzChannel = 480; |
25 | 25 |
26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { | 26 int KeyboardChannelIndex(const StreamConfig& stream_config) { |
27 switch (layout) { | 27 if (!stream_config.has_keyboard()) { |
28 case AudioProcessing::kMono: | 28 assert(false); |
29 case AudioProcessing::kStereo: | 29 return -1; |
30 return false; | |
31 case AudioProcessing::kMonoAndKeyboard: | |
32 case AudioProcessing::kStereoAndKeyboard: | |
33 return true; | |
34 } | 30 } |
35 assert(false); | |
36 return false; | |
37 } | |
38 | 31 |
39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { | 32 switch (stream_config.num_channels()) { |
Andrew MacDonald
2015/07/22 22:47:21
I see that this is identical to the previous behav
mgraczyk
2015/07/23 00:16:54
Changed to be generic.
I agree it makes sense for
Andrew MacDonald
2015/07/23 00:49:06
That's right.
| |
40 switch (layout) { | 33 case 1: |
41 case AudioProcessing::kMono: | |
42 case AudioProcessing::kStereo: | |
43 assert(false); | |
44 return -1; | |
45 case AudioProcessing::kMonoAndKeyboard: | |
46 return 1; | 34 return 1; |
47 case AudioProcessing::kStereoAndKeyboard: | 35 case 2: |
48 return 2; | 36 return 2; |
49 } | 37 } |
50 assert(false); | 38 assert(false); |
51 return -1; | 39 return -1; |
52 } | 40 } |
53 | 41 |
54 template <typename T> | |
55 void StereoToMono(const T* left, const T* right, T* out, | |
56 int num_frames) { | |
57 for (int i = 0; i < num_frames; ++i) | |
58 out[i] = (left[i] + right[i]) / 2; | |
59 } | |
60 | |
61 int NumBandsFromSamplesPerChannel(int num_frames) { | 42 int NumBandsFromSamplesPerChannel(int num_frames) { |
62 int num_bands = 1; | 43 int num_bands = 1; |
63 if (num_frames == kSamplesPer32kHzChannel || | 44 if (num_frames == kSamplesPer32kHzChannel || |
64 num_frames == kSamplesPer48kHzChannel) { | 45 num_frames == kSamplesPer48kHzChannel) { |
65 num_bands = rtc::CheckedDivExact(num_frames, | 46 num_bands = rtc::CheckedDivExact(num_frames, |
66 static_cast<int>(kSamplesPer16kHzChannel)); | 47 static_cast<int>(kSamplesPer16kHzChannel)); |
67 } | 48 } |
68 return num_bands; | 49 return num_bands; |
69 } | 50 } |
70 | 51 |
(...skipping 13 matching lines...) Expand all Loading... | |
84 num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)), | 65 num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)), |
85 num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)), | 66 num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)), |
86 mixed_low_pass_valid_(false), | 67 mixed_low_pass_valid_(false), |
87 reference_copied_(false), | 68 reference_copied_(false), |
88 activity_(AudioFrame::kVadUnknown), | 69 activity_(AudioFrame::kVadUnknown), |
89 keyboard_data_(NULL), | 70 keyboard_data_(NULL), |
90 data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) { | 71 data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) { |
91 assert(input_num_frames_ > 0); | 72 assert(input_num_frames_ > 0); |
92 assert(proc_num_frames_ > 0); | 73 assert(proc_num_frames_ > 0); |
93 assert(output_num_frames_ > 0); | 74 assert(output_num_frames_ > 0); |
94 assert(num_input_channels_ > 0 && num_input_channels_ <= 2); | 75 assert(num_input_channels_ > 0); |
95 assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_); | 76 assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_); |
96 | 77 |
97 if (input_num_frames_ != proc_num_frames_ || | 78 if (input_num_frames_ != proc_num_frames_ || |
98 output_num_frames_ != proc_num_frames_) { | 79 output_num_frames_ != proc_num_frames_) { |
99 // Create an intermediate buffer for resampling. | 80 // Create an intermediate buffer for resampling. |
100 process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_, | 81 process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_, |
101 num_proc_channels_)); | 82 num_proc_channels_)); |
102 | 83 |
103 if (input_num_frames_ != proc_num_frames_) { | 84 if (input_num_frames_ != proc_num_frames_) { |
104 for (int i = 0; i < num_proc_channels_; ++i) { | 85 for (int i = 0; i < num_proc_channels_; ++i) { |
(...skipping 18 matching lines...) Expand all Loading... | |
123 num_bands_)); | 104 num_bands_)); |
124 splitting_filter_.reset(new SplittingFilter(num_proc_channels_, | 105 splitting_filter_.reset(new SplittingFilter(num_proc_channels_, |
125 num_bands_, | 106 num_bands_, |
126 proc_num_frames_)); | 107 proc_num_frames_)); |
127 } | 108 } |
128 } | 109 } |
129 | 110 |
130 AudioBuffer::~AudioBuffer() {} | 111 AudioBuffer::~AudioBuffer() {} |
131 | 112 |
132 void AudioBuffer::CopyFrom(const float* const* data, | 113 void AudioBuffer::CopyFrom(const float* const* data, |
133 int num_frames, | 114 const StreamConfig& stream_config) { |
134 AudioProcessing::ChannelLayout layout) { | 115 assert(stream_config.num_frames() == input_num_frames_); |
135 assert(num_frames == input_num_frames_); | 116 assert(stream_config.num_channels() == num_input_channels_); |
136 assert(ChannelsFromLayout(layout) == num_input_channels_); | |
137 InitForNewData(); | 117 InitForNewData(); |
138 // Initialized lazily because there's a different condition in | 118 // Initialized lazily because there's a different condition in |
139 // DeinterleaveFrom. | 119 // DeinterleaveFrom. |
140 if ((num_input_channels_ == 2 && num_proc_channels_ == 1) && !input_buffer_) { | 120 const bool need_to_downmix = |
121 num_input_channels_ > 1 && num_proc_channels_ == 1; | |
122 if (need_to_downmix && !input_buffer_) { | |
141 input_buffer_.reset( | 123 input_buffer_.reset( |
142 new IFChannelBuffer(input_num_frames_, num_proc_channels_)); | 124 new IFChannelBuffer(input_num_frames_, num_proc_channels_)); |
143 } | 125 } |
144 | 126 |
145 if (HasKeyboardChannel(layout)) { | 127 if (stream_config.has_keyboard()) { |
146 keyboard_data_ = data[KeyboardChannelIndex(layout)]; | 128 keyboard_data_ = data[KeyboardChannelIndex(stream_config)]; |
147 } | 129 } |
148 | 130 |
149 // Downmix. | 131 // Downmix. |
150 const float* const* data_ptr = data; | 132 const float* const* data_ptr = data; |
151 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { | 133 if (need_to_downmix) { |
152 StereoToMono(data[0], | 134 DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_, |
153 data[1], | 135 input_buffer_->fbuf()->channels()[0]); |
154 input_buffer_->fbuf()->channels()[0], | |
155 input_num_frames_); | |
156 data_ptr = input_buffer_->fbuf_const()->channels(); | 136 data_ptr = input_buffer_->fbuf_const()->channels(); |
157 } | 137 } |
158 | 138 |
159 // Resample. | 139 // Resample. |
160 if (input_num_frames_ != proc_num_frames_) { | 140 if (input_num_frames_ != proc_num_frames_) { |
161 for (int i = 0; i < num_proc_channels_; ++i) { | 141 for (int i = 0; i < num_proc_channels_; ++i) { |
162 input_resamplers_[i]->Resample(data_ptr[i], | 142 input_resamplers_[i]->Resample(data_ptr[i], |
163 input_num_frames_, | 143 input_num_frames_, |
164 process_buffer_->channels()[i], | 144 process_buffer_->channels()[i], |
165 proc_num_frames_); | 145 proc_num_frames_); |
166 } | 146 } |
167 data_ptr = process_buffer_->channels(); | 147 data_ptr = process_buffer_->channels(); |
168 } | 148 } |
169 | 149 |
170 // Convert to the S16 range. | 150 // Convert to the S16 range. |
171 for (int i = 0; i < num_proc_channels_; ++i) { | 151 for (int i = 0; i < num_proc_channels_; ++i) { |
172 FloatToFloatS16(data_ptr[i], | 152 FloatToFloatS16(data_ptr[i], |
173 proc_num_frames_, | 153 proc_num_frames_, |
174 data_->fbuf()->channels()[i]); | 154 data_->fbuf()->channels()[i]); |
175 } | 155 } |
176 } | 156 } |
177 | 157 |
178 void AudioBuffer::CopyTo(int num_frames, | 158 void AudioBuffer::CopyTo(const StreamConfig& stream_config, |
179 AudioProcessing::ChannelLayout layout, | |
180 float* const* data) { | 159 float* const* data) { |
181 assert(num_frames == output_num_frames_); | 160 assert(stream_config.num_frames() == output_num_frames_); |
182 assert(ChannelsFromLayout(layout) == num_channels_); | 161 assert(stream_config.num_channels() == num_channels_); |
183 | 162 |
184 // Convert to the float range. | 163 // Convert to the float range. |
185 float* const* data_ptr = data; | 164 float* const* data_ptr = data; |
186 if (output_num_frames_ != proc_num_frames_) { | 165 if (output_num_frames_ != proc_num_frames_) { |
187 // Convert to an intermediate buffer for subsequent resampling. | 166 // Convert to an intermediate buffer for subsequent resampling. |
188 data_ptr = process_buffer_->channels(); | 167 data_ptr = process_buffer_->channels(); |
189 } | 168 } |
190 for (int i = 0; i < num_channels_; ++i) { | 169 for (int i = 0; i < num_channels_; ++i) { |
191 FloatS16ToFloat(data_->fbuf()->channels()[i], | 170 FloatS16ToFloat(data_->fbuf()->channels()[i], |
192 proc_num_frames_, | 171 proc_num_frames_, |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
320 ChannelBuffer<float>* AudioBuffer::split_data_f() { | 299 ChannelBuffer<float>* AudioBuffer::split_data_f() { |
321 mixed_low_pass_valid_ = false; | 300 mixed_low_pass_valid_ = false; |
322 return split_data_.get() ? split_data_->fbuf() : data_->fbuf(); | 301 return split_data_.get() ? split_data_->fbuf() : data_->fbuf(); |
323 } | 302 } |
324 | 303 |
325 const ChannelBuffer<float>* AudioBuffer::split_data_f() const { | 304 const ChannelBuffer<float>* AudioBuffer::split_data_f() const { |
326 return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const(); | 305 return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const(); |
327 } | 306 } |
328 | 307 |
329 const int16_t* AudioBuffer::mixed_low_pass_data() { | 308 const int16_t* AudioBuffer::mixed_low_pass_data() { |
330 // Currently only mixing stereo to mono is supported. | |
331 assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); | |
332 | |
333 if (num_proc_channels_ == 1) { | 309 if (num_proc_channels_ == 1) { |
334 return split_bands_const(0)[kBand0To8kHz]; | 310 return split_bands_const(0)[kBand0To8kHz]; |
335 } | 311 } |
336 | 312 |
337 if (!mixed_low_pass_valid_) { | 313 if (!mixed_low_pass_valid_) { |
338 if (!mixed_low_pass_channels_.get()) { | 314 if (!mixed_low_pass_channels_.get()) { |
339 mixed_low_pass_channels_.reset( | 315 mixed_low_pass_channels_.reset( |
340 new ChannelBuffer<int16_t>(num_split_frames_, 1)); | 316 new ChannelBuffer<int16_t>(num_split_frames_, 1)); |
341 } | 317 } |
342 StereoToMono(split_bands_const(0)[kBand0To8kHz], | 318 |
343 split_bands_const(1)[kBand0To8kHz], | 319 DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz), |
344 mixed_low_pass_channels_->channels()[0], | 320 num_split_frames_, num_channels_, |
345 num_split_frames_); | 321 mixed_low_pass_channels_->channels()[0]); |
346 mixed_low_pass_valid_ = true; | 322 mixed_low_pass_valid_ = true; |
347 } | 323 } |
348 return mixed_low_pass_channels_->channels()[0]; | 324 return mixed_low_pass_channels_->channels()[0]; |
349 } | 325 } |
350 | 326 |
351 const int16_t* AudioBuffer::low_pass_reference(int channel) const { | 327 const int16_t* AudioBuffer::low_pass_reference(int channel) const { |
352 if (!reference_copied_) { | 328 if (!reference_copied_) { |
353 return NULL; | 329 return NULL; |
354 } | 330 } |
355 | 331 |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
404 new IFChannelBuffer(input_num_frames_, num_proc_channels_)); | 380 new IFChannelBuffer(input_num_frames_, num_proc_channels_)); |
405 } | 381 } |
406 activity_ = frame->vad_activity_; | 382 activity_ = frame->vad_activity_; |
407 | 383 |
408 int16_t* const* deinterleaved; | 384 int16_t* const* deinterleaved; |
409 if (input_num_frames_ == proc_num_frames_) { | 385 if (input_num_frames_ == proc_num_frames_) { |
410 deinterleaved = data_->ibuf()->channels(); | 386 deinterleaved = data_->ibuf()->channels(); |
411 } else { | 387 } else { |
412 deinterleaved = input_buffer_->ibuf()->channels(); | 388 deinterleaved = input_buffer_->ibuf()->channels(); |
413 } | 389 } |
414 if (num_input_channels_ == 2 && num_proc_channels_ == 1) { | 390 if (num_proc_channels_ == 1) { |
415 // Downmix directly; no explicit deinterleaving needed. | 391 // Downmix and deinterleave simultaneously. |
416 for (int i = 0; i < input_num_frames_; ++i) { | 392 DownmixInterleavedToMono(frame->data_, input_num_frames_, |
417 deinterleaved[0][i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; | 393 num_input_channels_, deinterleaved[0]); |
418 } | |
419 } else { | 394 } else { |
420 assert(num_proc_channels_ == num_input_channels_); | 395 assert(num_proc_channels_ == num_input_channels_); |
421 Deinterleave(frame->data_, | 396 Deinterleave(frame->data_, |
422 input_num_frames_, | 397 input_num_frames_, |
423 num_proc_channels_, | 398 num_proc_channels_, |
424 deinterleaved); | 399 deinterleaved); |
425 } | 400 } |
426 | 401 |
427 // Resample. | 402 // Resample. |
428 if (input_num_frames_ != proc_num_frames_) { | 403 if (input_num_frames_ != proc_num_frames_) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
470 | 445 |
471 void AudioBuffer::SplitIntoFrequencyBands() { | 446 void AudioBuffer::SplitIntoFrequencyBands() { |
472 splitting_filter_->Analysis(data_.get(), split_data_.get()); | 447 splitting_filter_->Analysis(data_.get(), split_data_.get()); |
473 } | 448 } |
474 | 449 |
475 void AudioBuffer::MergeFrequencyBands() { | 450 void AudioBuffer::MergeFrequencyBands() { |
476 splitting_filter_->Synthesis(split_data_.get(), data_.get()); | 451 splitting_filter_->Synthesis(split_data_.get(), data_.get()); |
477 } | 452 } |
478 | 453 |
479 } // namespace webrtc | 454 } // namespace webrtc |
OLD | NEW |