OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 14 matching lines...) Expand all Loading... |
25 | 25 |
26 } // namespace | 26 } // namespace |
27 | 27 |
28 void AudioFrameOperations::Add(const AudioFrame& frame_to_add, | 28 void AudioFrameOperations::Add(const AudioFrame& frame_to_add, |
29 AudioFrame* result_frame) { | 29 AudioFrame* result_frame) { |
30 // Sanity check. | 30 // Sanity check. |
31 RTC_DCHECK(result_frame); | 31 RTC_DCHECK(result_frame); |
32 RTC_DCHECK_GT(result_frame->num_channels_, 0); | 32 RTC_DCHECK_GT(result_frame->num_channels_, 0); |
33 RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_); | 33 RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_); |
34 | 34 |
35 bool no_previous_data = false; | 35 bool no_previous_data = result_frame->muted(); |
36 if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) { | 36 if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) { |
37 // Special case we have no data to start with. | 37 // Special case we have no data to start with. |
38 RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0); | 38 RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0); |
39 result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_; | 39 result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_; |
40 no_previous_data = true; | 40 no_previous_data = true; |
41 } | 41 } |
42 | 42 |
43 if (result_frame->vad_activity_ == AudioFrame::kVadActive || | 43 if (result_frame->vad_activity_ == AudioFrame::kVadActive || |
44 frame_to_add.vad_activity_ == AudioFrame::kVadActive) { | 44 frame_to_add.vad_activity_ == AudioFrame::kVadActive) { |
45 result_frame->vad_activity_ = AudioFrame::kVadActive; | 45 result_frame->vad_activity_ = AudioFrame::kVadActive; |
46 } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown || | 46 } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown || |
47 frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) { | 47 frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) { |
48 result_frame->vad_activity_ = AudioFrame::kVadUnknown; | 48 result_frame->vad_activity_ = AudioFrame::kVadUnknown; |
49 } | 49 } |
50 | 50 |
51 if (result_frame->speech_type_ != frame_to_add.speech_type_) | 51 if (result_frame->speech_type_ != frame_to_add.speech_type_) |
52 result_frame->speech_type_ = AudioFrame::kUndefined; | 52 result_frame->speech_type_ = AudioFrame::kUndefined; |
53 | 53 |
54 if (no_previous_data) { | 54 if (!frame_to_add.muted()) { |
55 std::copy(frame_to_add.data_, frame_to_add.data_ + | 55 const int16_t* in_data = frame_to_add.data(); |
56 frame_to_add.samples_per_channel_ * | 56 int16_t* out_data = result_frame->mutable_data(); |
57 result_frame->num_channels_, | 57 size_t length = |
58 result_frame->data_); | 58 frame_to_add.samples_per_channel_ * frame_to_add.num_channels_; |
59 } else { | 59 if (no_previous_data) { |
60 for (size_t i = 0; | 60 std::copy(in_data, in_data + length, out_data); |
61 i < result_frame->samples_per_channel_ * result_frame->num_channels_; | 61 } else { |
62 i++) { | 62 for (size_t i = 0; i < length; i++) { |
63 const int32_t wrap_guard = static_cast<int32_t>(result_frame->data_[i]) + | 63 const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) + |
64 static_cast<int32_t>(frame_to_add.data_[i]); | 64 static_cast<int32_t>(in_data[i]); |
65 result_frame->data_[i] = rtc::saturated_cast<int16_t>(wrap_guard); | 65 out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard); |
| 66 } |
66 } | 67 } |
67 } | 68 } |
68 return; | |
69 } | 69 } |
70 | 70 |
71 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio, | 71 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio, |
72 size_t samples_per_channel, | 72 size_t samples_per_channel, |
73 int16_t* dst_audio) { | 73 int16_t* dst_audio) { |
74 for (size_t i = 0; i < samples_per_channel; i++) { | 74 for (size_t i = 0; i < samples_per_channel; i++) { |
75 dst_audio[2 * i] = src_audio[i]; | 75 dst_audio[2 * i] = src_audio[i]; |
76 dst_audio[2 * i + 1] = src_audio[i]; | 76 dst_audio[2 * i + 1] = src_audio[i]; |
77 } | 77 } |
78 } | 78 } |
79 | 79 |
80 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { | 80 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { |
81 if (frame->num_channels_ != 1) { | 81 if (frame->num_channels_ != 1) { |
82 return -1; | 82 return -1; |
83 } | 83 } |
84 if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) { | 84 if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) { |
85 // Not enough memory to expand from mono to stereo. | 85 // Not enough memory to expand from mono to stereo. |
86 return -1; | 86 return -1; |
87 } | 87 } |
88 | 88 |
89 int16_t data_copy[AudioFrame::kMaxDataSizeSamples]; | 89 if (!frame->muted()) { |
90 memcpy(data_copy, frame->data_, | 90 // TODO(yujo): this operation can be done in place. |
91 sizeof(int16_t) * frame->samples_per_channel_); | 91 int16_t data_copy[AudioFrame::kMaxDataSizeSamples]; |
92 MonoToStereo(data_copy, frame->samples_per_channel_, frame->data_); | 92 memcpy(data_copy, frame->data(), |
| 93 sizeof(int16_t) * frame->samples_per_channel_); |
| 94 MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data()); |
| 95 } |
93 frame->num_channels_ = 2; | 96 frame->num_channels_ = 2; |
94 | 97 |
95 return 0; | 98 return 0; |
96 } | 99 } |
97 | 100 |
98 void AudioFrameOperations::StereoToMono(const int16_t* src_audio, | 101 void AudioFrameOperations::StereoToMono(const int16_t* src_audio, |
99 size_t samples_per_channel, | 102 size_t samples_per_channel, |
100 int16_t* dst_audio) { | 103 int16_t* dst_audio) { |
101 for (size_t i = 0; i < samples_per_channel; i++) { | 104 for (size_t i = 0; i < samples_per_channel; i++) { |
102 dst_audio[i] = | 105 dst_audio[i] = |
103 (static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1; | 106 (static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1; |
104 } | 107 } |
105 } | 108 } |
106 | 109 |
107 int AudioFrameOperations::StereoToMono(AudioFrame* frame) { | 110 int AudioFrameOperations::StereoToMono(AudioFrame* frame) { |
108 if (frame->num_channels_ != 2) { | 111 if (frame->num_channels_ != 2) { |
109 return -1; | 112 return -1; |
110 } | 113 } |
111 | 114 |
112 RTC_DCHECK_LE(frame->samples_per_channel_ * 2, | 115 RTC_DCHECK_LE(frame->samples_per_channel_ * 2, |
113 AudioFrame::kMaxDataSizeSamples); | 116 AudioFrame::kMaxDataSizeSamples); |
114 | 117 |
115 StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_); | 118 if (!frame->muted()) { |
| 119 StereoToMono(frame->data(), frame->samples_per_channel_, |
| 120 frame->mutable_data()); |
| 121 } |
116 frame->num_channels_ = 1; | 122 frame->num_channels_ = 1; |
117 | 123 |
118 return 0; | 124 return 0; |
119 } | 125 } |
120 | 126 |
121 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, | 127 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, |
122 size_t samples_per_channel, | 128 size_t samples_per_channel, |
123 int16_t* dst_audio) { | 129 int16_t* dst_audio) { |
124 for (size_t i = 0; i < samples_per_channel; i++) { | 130 for (size_t i = 0; i < samples_per_channel; i++) { |
125 dst_audio[i * 2] = | 131 dst_audio[i * 2] = |
126 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1; | 132 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1; |
127 dst_audio[i * 2 + 1] = | 133 dst_audio[i * 2 + 1] = |
128 (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >> | 134 (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >> |
129 1; | 135 1; |
130 } | 136 } |
131 } | 137 } |
132 | 138 |
133 int AudioFrameOperations::QuadToStereo(AudioFrame* frame) { | 139 int AudioFrameOperations::QuadToStereo(AudioFrame* frame) { |
134 if (frame->num_channels_ != 4) { | 140 if (frame->num_channels_ != 4) { |
135 return -1; | 141 return -1; |
136 } | 142 } |
137 | 143 |
138 RTC_DCHECK_LE(frame->samples_per_channel_ * 4, | 144 RTC_DCHECK_LE(frame->samples_per_channel_ * 4, |
139 AudioFrame::kMaxDataSizeSamples); | 145 AudioFrame::kMaxDataSizeSamples); |
140 | 146 |
141 QuadToStereo(frame->data_, frame->samples_per_channel_, frame->data_); | 147 if (!frame->muted()) { |
| 148 QuadToStereo(frame->data(), frame->samples_per_channel_, |
| 149 frame->mutable_data()); |
| 150 } |
142 frame->num_channels_ = 2; | 151 frame->num_channels_ = 2; |
143 | 152 |
144 return 0; | 153 return 0; |
145 } | 154 } |
146 | 155 |
147 void AudioFrameOperations::QuadToMono(const int16_t* src_audio, | 156 void AudioFrameOperations::QuadToMono(const int16_t* src_audio, |
148 size_t samples_per_channel, | 157 size_t samples_per_channel, |
149 int16_t* dst_audio) { | 158 int16_t* dst_audio) { |
150 for (size_t i = 0; i < samples_per_channel; i++) { | 159 for (size_t i = 0; i < samples_per_channel; i++) { |
151 dst_audio[i] = | 160 dst_audio[i] = |
152 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] + | 161 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] + |
153 src_audio[4 * i + 2] + src_audio[4 * i + 3]) >> 2; | 162 src_audio[4 * i + 2] + src_audio[4 * i + 3]) >> 2; |
154 } | 163 } |
155 } | 164 } |
156 | 165 |
157 int AudioFrameOperations::QuadToMono(AudioFrame* frame) { | 166 int AudioFrameOperations::QuadToMono(AudioFrame* frame) { |
158 if (frame->num_channels_ != 4) { | 167 if (frame->num_channels_ != 4) { |
159 return -1; | 168 return -1; |
160 } | 169 } |
161 | 170 |
162 RTC_DCHECK_LE(frame->samples_per_channel_ * 4, | 171 RTC_DCHECK_LE(frame->samples_per_channel_ * 4, |
163 AudioFrame::kMaxDataSizeSamples); | 172 AudioFrame::kMaxDataSizeSamples); |
164 | 173 |
165 QuadToMono(frame->data_, frame->samples_per_channel_, frame->data_); | 174 if (!frame->muted()) { |
| 175 QuadToMono(frame->data(), frame->samples_per_channel_, |
| 176 frame->mutable_data()); |
| 177 } |
166 frame->num_channels_ = 1; | 178 frame->num_channels_ = 1; |
167 | 179 |
168 return 0; | 180 return 0; |
169 } | 181 } |
170 | 182 |
171 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, | 183 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, |
172 size_t src_channels, | 184 size_t src_channels, |
173 size_t samples_per_channel, | 185 size_t samples_per_channel, |
174 size_t dst_channels, | 186 size_t dst_channels, |
175 int16_t* dst_audio) { | 187 int16_t* dst_audio) { |
(...skipping 20 matching lines...) Expand all Loading... |
196 return QuadToStereo(frame); | 208 return QuadToStereo(frame); |
197 } else if (frame->num_channels_ == 4 && dst_channels == 1) { | 209 } else if (frame->num_channels_ == 4 && dst_channels == 1) { |
198 return QuadToMono(frame); | 210 return QuadToMono(frame); |
199 } | 211 } |
200 | 212 |
201 return -1; | 213 return -1; |
202 } | 214 } |
203 | 215 |
204 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { | 216 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { |
205 RTC_DCHECK(frame); | 217 RTC_DCHECK(frame); |
206 if (frame->num_channels_ != 2) { | 218 if (frame->num_channels_ != 2 || frame->muted()) { |
207 return; | 219 return; |
208 } | 220 } |
209 | 221 |
| 222 int16_t* frame_data = frame->mutable_data(); |
210 for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { | 223 for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { |
211 int16_t temp_data = frame->data_[i]; | 224 int16_t temp_data = frame_data[i]; |
212 frame->data_[i] = frame->data_[i + 1]; | 225 frame_data[i] = frame_data[i + 1]; |
213 frame->data_[i + 1] = temp_data; | 226 frame_data[i + 1] = temp_data; |
214 } | 227 } |
215 } | 228 } |
216 | 229 |
217 void AudioFrameOperations::Mute(AudioFrame* frame, | 230 void AudioFrameOperations::Mute(AudioFrame* frame, |
218 bool previous_frame_muted, | 231 bool previous_frame_muted, |
219 bool current_frame_muted) { | 232 bool current_frame_muted) { |
220 RTC_DCHECK(frame); | 233 RTC_DCHECK(frame); |
221 if (!previous_frame_muted && !current_frame_muted) { | 234 if (!previous_frame_muted && !current_frame_muted) { |
222 // Not muted, don't touch. | 235 // Not muted, don't touch. |
223 } else if (previous_frame_muted && current_frame_muted) { | 236 } else if (previous_frame_muted && current_frame_muted) { |
224 // Frame fully muted. | 237 // Frame fully muted. |
225 size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; | 238 size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; |
226 RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); | 239 RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); |
227 memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples); | 240 frame->Mute(); |
228 } else { | 241 } else { |
| 242 // Fade is a no-op on a muted frame. |
| 243 if (frame->muted()) { |
| 244 return; |
| 245 } |
| 246 |
229 // Limit number of samples to fade, if frame isn't long enough. | 247 // Limit number of samples to fade, if frame isn't long enough. |
230 size_t count = kMuteFadeFrames; | 248 size_t count = kMuteFadeFrames; |
231 float inc = kMuteFadeInc; | 249 float inc = kMuteFadeInc; |
232 if (frame->samples_per_channel_ < kMuteFadeFrames) { | 250 if (frame->samples_per_channel_ < kMuteFadeFrames) { |
233 count = frame->samples_per_channel_; | 251 count = frame->samples_per_channel_; |
234 if (count > 0) { | 252 if (count > 0) { |
235 inc = 1.0f / count; | 253 inc = 1.0f / count; |
236 } | 254 } |
237 } | 255 } |
238 | 256 |
239 size_t start = 0; | 257 size_t start = 0; |
240 size_t end = count; | 258 size_t end = count; |
241 float start_g = 0.0f; | 259 float start_g = 0.0f; |
242 if (current_frame_muted) { | 260 if (current_frame_muted) { |
243 // Fade out the last |count| samples of frame. | 261 // Fade out the last |count| samples of frame. |
244 RTC_DCHECK(!previous_frame_muted); | 262 RTC_DCHECK(!previous_frame_muted); |
245 start = frame->samples_per_channel_ - count; | 263 start = frame->samples_per_channel_ - count; |
246 end = frame->samples_per_channel_; | 264 end = frame->samples_per_channel_; |
247 start_g = 1.0f; | 265 start_g = 1.0f; |
248 inc = -inc; | 266 inc = -inc; |
249 } else { | 267 } else { |
250 // Fade in the first |count| samples of frame. | 268 // Fade in the first |count| samples of frame. |
251 RTC_DCHECK(previous_frame_muted); | 269 RTC_DCHECK(previous_frame_muted); |
252 } | 270 } |
253 | 271 |
254 // Perform fade. | 272 // Perform fade. |
| 273 int16_t* frame_data = frame->mutable_data(); |
255 size_t channels = frame->num_channels_; | 274 size_t channels = frame->num_channels_; |
256 for (size_t j = 0; j < channels; ++j) { | 275 for (size_t j = 0; j < channels; ++j) { |
257 float g = start_g; | 276 float g = start_g; |
258 for (size_t i = start * channels; i < end * channels; i += channels) { | 277 for (size_t i = start * channels; i < end * channels; i += channels) { |
259 g += inc; | 278 g += inc; |
260 frame->data_[i + j] *= g; | 279 frame_data[i + j] *= g; |
261 } | 280 } |
262 } | 281 } |
263 } | 282 } |
264 } | 283 } |
265 | 284 |
266 void AudioFrameOperations::Mute(AudioFrame* frame) { | 285 void AudioFrameOperations::Mute(AudioFrame* frame) { |
267 Mute(frame, true, true); | 286 Mute(frame, true, true); |
268 } | 287 } |
269 | 288 |
270 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) { | 289 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) { |
271 RTC_DCHECK(frame); | 290 RTC_DCHECK(frame); |
272 RTC_DCHECK_GT(frame->num_channels_, 0); | 291 RTC_DCHECK_GT(frame->num_channels_, 0); |
273 if (frame->num_channels_ < 1) { | 292 if (frame->num_channels_ < 1 || frame->muted()) { |
274 return; | 293 return; |
275 } | 294 } |
276 | 295 |
| 296 int16_t* frame_data = frame->mutable_data(); |
277 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; | 297 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; |
278 i++) { | 298 i++) { |
279 frame->data_[i] = frame->data_[i] >> 1; | 299 frame_data[i] = frame_data[i] >> 1; |
280 } | 300 } |
281 } | 301 } |
282 | 302 |
283 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) { | 303 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) { |
284 if (frame->num_channels_ != 2) { | 304 if (frame->num_channels_ != 2) { |
285 return -1; | 305 return -1; |
| 306 } else if (frame->muted()) { |
| 307 return 0; |
286 } | 308 } |
287 | 309 |
| 310 int16_t* frame_data = frame->mutable_data(); |
288 for (size_t i = 0; i < frame->samples_per_channel_; i++) { | 311 for (size_t i = 0; i < frame->samples_per_channel_; i++) { |
289 frame->data_[2 * i] = static_cast<int16_t>(left * frame->data_[2 * i]); | 312 frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]); |
290 frame->data_[2 * i + 1] = | 313 frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]); |
291 static_cast<int16_t>(right * frame->data_[2 * i + 1]); | |
292 } | 314 } |
293 return 0; | 315 return 0; |
294 } | 316 } |
295 | 317 |
296 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) { | 318 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) { |
297 int32_t temp_data = 0; | 319 if (frame->muted()) { |
| 320 return 0; |
| 321 } |
298 | 322 |
299 // Ensure that the output result is saturated [-32768, +32767]. | 323 int16_t* frame_data = frame->mutable_data(); |
300 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; | 324 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; |
301 i++) { | 325 i++) { |
302 temp_data = static_cast<int32_t>(scale * frame->data_[i]); | 326 frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]); |
303 if (temp_data < -32768) { | |
304 frame->data_[i] = -32768; | |
305 } else if (temp_data > 32767) { | |
306 frame->data_[i] = 32767; | |
307 } else { | |
308 frame->data_[i] = static_cast<int16_t>(temp_data); | |
309 } | |
310 } | 327 } |
311 return 0; | 328 return 0; |
312 } | 329 } |
313 } // namespace webrtc | 330 } // namespace webrtc |
OLD | NEW |