OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_coding/acm2/audio_coding_module_impl.h" | |
12 | |
13 #include <assert.h> | |
14 #include <stdlib.h> | |
15 #include <vector> | |
16 | |
17 #include "webrtc/base/checks.h" | |
18 #include "webrtc/base/safe_conversions.h" | |
19 #include "webrtc/engine_configurations.h" | |
20 #include "webrtc/modules/audio_coding/include/audio_coding_module_typedefs.h" | |
21 #include "webrtc/modules/audio_coding/acm2/acm_common_defs.h" | |
22 #include "webrtc/modules/audio_coding/acm2/acm_resampler.h" | |
23 #include "webrtc/modules/audio_coding/acm2/call_statistics.h" | |
24 #include "webrtc/system_wrappers/include/logging.h" | |
25 #include "webrtc/system_wrappers/include/metrics.h" | |
26 #include "webrtc/system_wrappers/include/rw_lock_wrapper.h" | |
27 #include "webrtc/system_wrappers/include/trace.h" | |
28 #include "webrtc/typedefs.h" | |
29 | |
30 namespace webrtc { | |
31 | |
32 namespace { | |
33 | |
34 // Adds a codec usage sample to the histogram. | |
35 void UpdateCodecTypeHistogram(size_t codec_type) { | |
36 RTC_HISTOGRAM_ENUMERATION( | |
37 "WebRTC.Audio.Encoder.CodecType", static_cast<int>(codec_type), | |
38 static_cast<int>( | |
39 webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)); | |
40 } | |
41 | |
42 } // namespace | |
43 | |
44 namespace acm2 { | |
45 | |
46 struct EncoderFactory { | |
47 AudioEncoder* external_speech_encoder = nullptr; | |
48 CodecManager codec_manager; | |
49 RentACodec rent_a_codec; | |
50 }; | |
51 | |
52 namespace { | |
53 | |
54 // TODO(turajs): the same functionality is used in NetEq. If both classes | |
55 // need them, make it a static function in ACMCodecDB. | |
56 bool IsCodecRED(const CodecInst& codec) { | |
57 return (STR_CASE_CMP(codec.plname, "RED") == 0); | |
58 } | |
59 | |
60 bool IsCodecCN(const CodecInst& codec) { | |
61 return (STR_CASE_CMP(codec.plname, "CN") == 0); | |
62 } | |
63 | |
64 // Stereo-to-mono can be used as in-place. | |
65 int DownMix(const AudioFrame& frame, | |
66 size_t length_out_buff, | |
67 int16_t* out_buff) { | |
68 if (length_out_buff < frame.samples_per_channel_) { | |
69 return -1; | |
70 } | |
71 for (size_t n = 0; n < frame.samples_per_channel_; ++n) | |
72 out_buff[n] = (frame.data_[2 * n] + frame.data_[2 * n + 1]) >> 1; | |
73 return 0; | |
74 } | |
75 | |
76 // Mono-to-stereo can be used as in-place. | |
77 int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) { | |
78 if (length_out_buff < frame.samples_per_channel_) { | |
79 return -1; | |
80 } | |
81 for (size_t n = frame.samples_per_channel_; n != 0; --n) { | |
82 size_t i = n - 1; | |
83 int16_t sample = frame.data_[i]; | |
84 out_buff[2 * i + 1] = sample; | |
85 out_buff[2 * i] = sample; | |
86 } | |
87 return 0; | |
88 } | |
89 | |
90 void ConvertEncodedInfoToFragmentationHeader( | |
91 const AudioEncoder::EncodedInfo& info, | |
92 RTPFragmentationHeader* frag) { | |
93 if (info.redundant.empty()) { | |
94 frag->fragmentationVectorSize = 0; | |
95 return; | |
96 } | |
97 | |
98 frag->VerifyAndAllocateFragmentationHeader( | |
99 static_cast<uint16_t>(info.redundant.size())); | |
100 frag->fragmentationVectorSize = static_cast<uint16_t>(info.redundant.size()); | |
101 size_t offset = 0; | |
102 for (size_t i = 0; i < info.redundant.size(); ++i) { | |
103 frag->fragmentationOffset[i] = offset; | |
104 offset += info.redundant[i].encoded_bytes; | |
105 frag->fragmentationLength[i] = info.redundant[i].encoded_bytes; | |
106 frag->fragmentationTimeDiff[i] = rtc::checked_cast<uint16_t>( | |
107 info.encoded_timestamp - info.redundant[i].encoded_timestamp); | |
108 frag->fragmentationPlType[i] = info.redundant[i].payload_type; | |
109 } | |
110 } | |
111 | |
112 // Wraps a raw AudioEncoder pointer. The idea is that you can put one of these | |
113 // in a unique_ptr, to protect the contained raw pointer from being deleted | |
114 // when the unique_ptr expires. (This is of course a bad idea in general, but | |
115 // backwards compatibility.) | |
116 class RawAudioEncoderWrapper final : public AudioEncoder { | |
117 public: | |
118 RawAudioEncoderWrapper(AudioEncoder* enc) : enc_(enc) {} | |
119 int SampleRateHz() const override { return enc_->SampleRateHz(); } | |
120 size_t NumChannels() const override { return enc_->NumChannels(); } | |
121 int RtpTimestampRateHz() const override { return enc_->RtpTimestampRateHz(); } | |
122 size_t Num10MsFramesInNextPacket() const override { | |
123 return enc_->Num10MsFramesInNextPacket(); | |
124 } | |
125 size_t Max10MsFramesInAPacket() const override { | |
126 return enc_->Max10MsFramesInAPacket(); | |
127 } | |
128 int GetTargetBitrate() const override { return enc_->GetTargetBitrate(); } | |
129 EncodedInfo EncodeImpl(uint32_t rtp_timestamp, | |
130 rtc::ArrayView<const int16_t> audio, | |
131 rtc::Buffer* encoded) override { | |
132 return enc_->Encode(rtp_timestamp, audio, encoded); | |
133 } | |
134 void Reset() override { return enc_->Reset(); } | |
135 bool SetFec(bool enable) override { return enc_->SetFec(enable); } | |
136 bool SetDtx(bool enable) override { return enc_->SetDtx(enable); } | |
137 bool SetApplication(Application application) override { | |
138 return enc_->SetApplication(application); | |
139 } | |
140 void SetMaxPlaybackRate(int frequency_hz) override { | |
141 return enc_->SetMaxPlaybackRate(frequency_hz); | |
142 } | |
143 void SetProjectedPacketLossRate(double fraction) override { | |
144 return enc_->SetProjectedPacketLossRate(fraction); | |
145 } | |
146 void SetTargetBitrate(int target_bps) override { | |
147 return enc_->SetTargetBitrate(target_bps); | |
148 } | |
149 | |
150 private: | |
151 AudioEncoder* enc_; | |
152 }; | |
153 | |
154 // Return false on error. | |
155 bool CreateSpeechEncoderIfNecessary(EncoderFactory* ef) { | |
156 auto* sp = ef->codec_manager.GetStackParams(); | |
157 if (sp->speech_encoder) { | |
158 // Do nothing; we already have a speech encoder. | |
159 } else if (ef->codec_manager.GetCodecInst()) { | |
160 RTC_DCHECK(!ef->external_speech_encoder); | |
161 // We have no speech encoder, but we have a specification for making one. | |
162 std::unique_ptr<AudioEncoder> enc = | |
163 ef->rent_a_codec.RentEncoder(*ef->codec_manager.GetCodecInst()); | |
164 if (!enc) | |
165 return false; // Encoder spec was bad. | |
166 sp->speech_encoder = std::move(enc); | |
167 } else if (ef->external_speech_encoder) { | |
168 RTC_DCHECK(!ef->codec_manager.GetCodecInst()); | |
169 // We have an external speech encoder. | |
170 sp->speech_encoder = std::unique_ptr<AudioEncoder>( | |
171 new RawAudioEncoderWrapper(ef->external_speech_encoder)); | |
172 } | |
173 return true; | |
174 } | |
175 | |
176 } // namespace | |
177 | |
178 void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) { | |
179 if (value != last_value_ || first_time_) { | |
180 first_time_ = false; | |
181 last_value_ = value; | |
182 RTC_HISTOGRAM_COUNTS_SPARSE_100(histogram_name_, value); | |
183 } | |
184 } | |
185 | |
186 AudioCodingModuleImpl::AudioCodingModuleImpl( | |
187 const AudioCodingModule::Config& config) | |
188 : id_(config.id), | |
189 expected_codec_ts_(0xD87F3F9F), | |
190 expected_in_ts_(0xD87F3F9F), | |
191 receiver_(config), | |
192 bitrate_logger_("WebRTC.Audio.TargetBitrateInKbps"), | |
193 encoder_factory_(new EncoderFactory), | |
194 encoder_stack_(nullptr), | |
195 previous_pltype_(255), | |
196 receiver_initialized_(false), | |
197 first_10ms_data_(false), | |
198 first_frame_(true), | |
199 packetization_callback_(NULL), | |
200 vad_callback_(NULL), | |
201 codec_histogram_bins_log_(), | |
202 number_of_consecutive_empty_packets_(0) { | |
203 if (InitializeReceiverSafe() < 0) { | |
204 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
205 "Cannot initialize receiver"); | |
206 } | |
207 WEBRTC_TRACE(webrtc::kTraceMemory, webrtc::kTraceAudioCoding, id_, "Created"); | |
208 } | |
209 | |
210 AudioCodingModuleImpl::~AudioCodingModuleImpl() = default; | |
211 | |
212 int32_t AudioCodingModuleImpl::Encode(const InputData& input_data) { | |
213 AudioEncoder::EncodedInfo encoded_info; | |
214 uint8_t previous_pltype; | |
215 | |
216 // Check if there is an encoder before. | |
217 if (!HaveValidEncoder("Process")) | |
218 return -1; | |
219 | |
220 // Scale the timestamp to the codec's RTP timestamp rate. | |
221 uint32_t rtp_timestamp = | |
222 first_frame_ ? input_data.input_timestamp | |
223 : last_rtp_timestamp_ + | |
224 rtc::CheckedDivExact( | |
225 input_data.input_timestamp - last_timestamp_, | |
226 static_cast<uint32_t>(rtc::CheckedDivExact( | |
227 encoder_stack_->SampleRateHz(), | |
228 encoder_stack_->RtpTimestampRateHz()))); | |
229 last_timestamp_ = input_data.input_timestamp; | |
230 last_rtp_timestamp_ = rtp_timestamp; | |
231 first_frame_ = false; | |
232 | |
233 // Clear the buffer before reuse - encoded data will get appended. | |
234 encode_buffer_.Clear(); | |
235 encoded_info = encoder_stack_->Encode( | |
236 rtp_timestamp, rtc::ArrayView<const int16_t>( | |
237 input_data.audio, input_data.audio_channel * | |
238 input_data.length_per_channel), | |
239 &encode_buffer_); | |
240 | |
241 bitrate_logger_.MaybeLog(encoder_stack_->GetTargetBitrate() / 1000); | |
242 if (encode_buffer_.size() == 0 && !encoded_info.send_even_if_empty) { | |
243 // Not enough data. | |
244 return 0; | |
245 } | |
246 previous_pltype = previous_pltype_; // Read it while we have the critsect. | |
247 | |
248 // Log codec type to histogram once every 500 packets. | |
249 if (encoded_info.encoded_bytes == 0) { | |
250 ++number_of_consecutive_empty_packets_; | |
251 } else { | |
252 size_t codec_type = static_cast<size_t>(encoded_info.encoder_type); | |
253 codec_histogram_bins_log_[codec_type] += | |
254 number_of_consecutive_empty_packets_ + 1; | |
255 number_of_consecutive_empty_packets_ = 0; | |
256 if (codec_histogram_bins_log_[codec_type] >= 500) { | |
257 codec_histogram_bins_log_[codec_type] -= 500; | |
258 UpdateCodecTypeHistogram(codec_type); | |
259 } | |
260 } | |
261 | |
262 RTPFragmentationHeader my_fragmentation; | |
263 ConvertEncodedInfoToFragmentationHeader(encoded_info, &my_fragmentation); | |
264 FrameType frame_type; | |
265 if (encode_buffer_.size() == 0 && encoded_info.send_even_if_empty) { | |
266 frame_type = kEmptyFrame; | |
267 encoded_info.payload_type = previous_pltype; | |
268 } else { | |
269 RTC_DCHECK_GT(encode_buffer_.size(), 0u); | |
270 frame_type = encoded_info.speech ? kAudioFrameSpeech : kAudioFrameCN; | |
271 } | |
272 | |
273 { | |
274 rtc::CritScope lock(&callback_crit_sect_); | |
275 if (packetization_callback_) { | |
276 packetization_callback_->SendData( | |
277 frame_type, encoded_info.payload_type, encoded_info.encoded_timestamp, | |
278 encode_buffer_.data(), encode_buffer_.size(), | |
279 my_fragmentation.fragmentationVectorSize > 0 ? &my_fragmentation | |
280 : nullptr); | |
281 } | |
282 | |
283 if (vad_callback_) { | |
284 // Callback with VAD decision. | |
285 vad_callback_->InFrameType(frame_type); | |
286 } | |
287 } | |
288 previous_pltype_ = encoded_info.payload_type; | |
289 return static_cast<int32_t>(encode_buffer_.size()); | |
290 } | |
291 | |
292 ///////////////////////////////////////// | |
293 // Sender | |
294 // | |
295 | |
296 // Can be called multiple times for Codec, CNG, RED. | |
297 int AudioCodingModuleImpl::RegisterSendCodec(const CodecInst& send_codec) { | |
298 rtc::CritScope lock(&acm_crit_sect_); | |
299 if (!encoder_factory_->codec_manager.RegisterEncoder(send_codec)) { | |
300 return -1; | |
301 } | |
302 if (encoder_factory_->codec_manager.GetCodecInst()) { | |
303 encoder_factory_->external_speech_encoder = nullptr; | |
304 } | |
305 if (!CreateSpeechEncoderIfNecessary(encoder_factory_.get())) { | |
306 return -1; | |
307 } | |
308 auto* sp = encoder_factory_->codec_manager.GetStackParams(); | |
309 if (sp->speech_encoder) | |
310 encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp); | |
311 return 0; | |
312 } | |
313 | |
314 void AudioCodingModuleImpl::RegisterExternalSendCodec( | |
315 AudioEncoder* external_speech_encoder) { | |
316 rtc::CritScope lock(&acm_crit_sect_); | |
317 encoder_factory_->codec_manager.UnsetCodecInst(); | |
318 encoder_factory_->external_speech_encoder = external_speech_encoder; | |
319 RTC_CHECK(CreateSpeechEncoderIfNecessary(encoder_factory_.get())); | |
320 auto* sp = encoder_factory_->codec_manager.GetStackParams(); | |
321 RTC_CHECK(sp->speech_encoder); | |
322 encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp); | |
323 } | |
324 | |
325 void AudioCodingModuleImpl::ModifyEncoder( | |
326 FunctionView<void(std::unique_ptr<AudioEncoder>*)> modifier) { | |
327 rtc::CritScope lock(&acm_crit_sect_); | |
328 | |
329 // Wipe the encoder factory, so that everything that relies on it will fail. | |
330 // We don't want the complexity of supporting swapping back and forth. | |
331 if (encoder_factory_) { | |
332 encoder_factory_.reset(); | |
333 RTC_CHECK(!encoder_stack_); // Ensure we hadn't started using the factory. | |
334 } | |
335 | |
336 modifier(&encoder_stack_); | |
337 } | |
338 | |
339 // Get current send codec. | |
340 rtc::Optional<CodecInst> AudioCodingModuleImpl::SendCodec() const { | |
341 rtc::CritScope lock(&acm_crit_sect_); | |
342 if (encoder_factory_) { | |
343 auto* ci = encoder_factory_->codec_manager.GetCodecInst(); | |
344 if (ci) { | |
345 return rtc::Optional<CodecInst>(*ci); | |
346 } | |
347 CreateSpeechEncoderIfNecessary(encoder_factory_.get()); | |
348 const std::unique_ptr<AudioEncoder>& enc = | |
349 encoder_factory_->codec_manager.GetStackParams()->speech_encoder; | |
350 if (enc) { | |
351 return rtc::Optional<CodecInst>(CodecManager::ForgeCodecInst(enc.get())); | |
352 } | |
353 return rtc::Optional<CodecInst>(); | |
354 } else { | |
355 return encoder_stack_ | |
356 ? rtc::Optional<CodecInst>( | |
357 CodecManager::ForgeCodecInst(encoder_stack_.get())) | |
358 : rtc::Optional<CodecInst>(); | |
359 } | |
360 } | |
361 | |
362 // Get current send frequency. | |
363 int AudioCodingModuleImpl::SendFrequency() const { | |
364 WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_, | |
365 "SendFrequency()"); | |
366 rtc::CritScope lock(&acm_crit_sect_); | |
367 | |
368 if (!encoder_stack_) { | |
369 WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_, | |
370 "SendFrequency Failed, no codec is registered"); | |
371 return -1; | |
372 } | |
373 | |
374 return encoder_stack_->SampleRateHz(); | |
375 } | |
376 | |
377 void AudioCodingModuleImpl::SetBitRate(int bitrate_bps) { | |
378 rtc::CritScope lock(&acm_crit_sect_); | |
379 if (encoder_stack_) { | |
380 encoder_stack_->SetTargetBitrate(bitrate_bps); | |
381 } | |
382 } | |
383 | |
384 // Register a transport callback which will be called to deliver | |
385 // the encoded buffers. | |
386 int AudioCodingModuleImpl::RegisterTransportCallback( | |
387 AudioPacketizationCallback* transport) { | |
388 rtc::CritScope lock(&callback_crit_sect_); | |
389 packetization_callback_ = transport; | |
390 return 0; | |
391 } | |
392 | |
393 // Add 10MS of raw (PCM) audio data to the encoder. | |
394 int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) { | |
395 InputData input_data; | |
396 rtc::CritScope lock(&acm_crit_sect_); | |
397 int r = Add10MsDataInternal(audio_frame, &input_data); | |
398 return r < 0 ? r : Encode(input_data); | |
399 } | |
400 | |
401 int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame, | |
402 InputData* input_data) { | |
403 if (audio_frame.samples_per_channel_ == 0) { | |
404 assert(false); | |
405 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
406 "Cannot Add 10 ms audio, payload length is zero"); | |
407 return -1; | |
408 } | |
409 | |
410 if (audio_frame.sample_rate_hz_ > 48000) { | |
411 assert(false); | |
412 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
413 "Cannot Add 10 ms audio, input frequency not valid"); | |
414 return -1; | |
415 } | |
416 | |
417 // If the length and frequency matches. We currently just support raw PCM. | |
418 if (static_cast<size_t>(audio_frame.sample_rate_hz_ / 100) != | |
419 audio_frame.samples_per_channel_) { | |
420 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
421 "Cannot Add 10 ms audio, input frequency and length doesn't" | |
422 " match"); | |
423 return -1; | |
424 } | |
425 | |
426 if (audio_frame.num_channels_ != 1 && audio_frame.num_channels_ != 2) { | |
427 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
428 "Cannot Add 10 ms audio, invalid number of channels."); | |
429 return -1; | |
430 } | |
431 | |
432 // Do we have a codec registered? | |
433 if (!HaveValidEncoder("Add10MsData")) { | |
434 return -1; | |
435 } | |
436 | |
437 const AudioFrame* ptr_frame; | |
438 // Perform a resampling, also down-mix if it is required and can be | |
439 // performed before resampling (a down mix prior to resampling will take | |
440 // place if both primary and secondary encoders are mono and input is in | |
441 // stereo). | |
442 if (PreprocessToAddData(audio_frame, &ptr_frame) < 0) { | |
443 return -1; | |
444 } | |
445 | |
446 // Check whether we need an up-mix or down-mix? | |
447 const size_t current_num_channels = encoder_stack_->NumChannels(); | |
448 const bool same_num_channels = | |
449 ptr_frame->num_channels_ == current_num_channels; | |
450 | |
451 if (!same_num_channels) { | |
452 if (ptr_frame->num_channels_ == 1) { | |
453 if (UpMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0) | |
454 return -1; | |
455 } else { | |
456 if (DownMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0) | |
457 return -1; | |
458 } | |
459 } | |
460 | |
461 // When adding data to encoders this pointer is pointing to an audio buffer | |
462 // with correct number of channels. | |
463 const int16_t* ptr_audio = ptr_frame->data_; | |
464 | |
465 // For pushing data to primary, point the |ptr_audio| to correct buffer. | |
466 if (!same_num_channels) | |
467 ptr_audio = input_data->buffer; | |
468 | |
469 input_data->input_timestamp = ptr_frame->timestamp_; | |
470 input_data->audio = ptr_audio; | |
471 input_data->length_per_channel = ptr_frame->samples_per_channel_; | |
472 input_data->audio_channel = current_num_channels; | |
473 | |
474 return 0; | |
475 } | |
476 | |
477 // Perform a resampling and down-mix if required. We down-mix only if | |
478 // encoder is mono and input is stereo. In case of dual-streaming, both | |
479 // encoders has to be mono for down-mix to take place. | |
480 // |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing | |
481 // is required, |*ptr_out| points to |in_frame|. | |
482 int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame, | |
483 const AudioFrame** ptr_out) { | |
484 const bool resample = | |
485 in_frame.sample_rate_hz_ != encoder_stack_->SampleRateHz(); | |
486 | |
487 // This variable is true if primary codec and secondary codec (if exists) | |
488 // are both mono and input is stereo. | |
489 // TODO(henrik.lundin): This condition should probably be | |
490 // in_frame.num_channels_ > encoder_stack_->NumChannels() | |
491 const bool down_mix = | |
492 in_frame.num_channels_ == 2 && encoder_stack_->NumChannels() == 1; | |
493 | |
494 if (!first_10ms_data_) { | |
495 expected_in_ts_ = in_frame.timestamp_; | |
496 expected_codec_ts_ = in_frame.timestamp_; | |
497 first_10ms_data_ = true; | |
498 } else if (in_frame.timestamp_ != expected_in_ts_) { | |
499 // TODO(turajs): Do we need a warning here. | |
500 expected_codec_ts_ += | |
501 (in_frame.timestamp_ - expected_in_ts_) * | |
502 static_cast<uint32_t>( | |
503 static_cast<double>(encoder_stack_->SampleRateHz()) / | |
504 static_cast<double>(in_frame.sample_rate_hz_)); | |
505 expected_in_ts_ = in_frame.timestamp_; | |
506 } | |
507 | |
508 | |
509 if (!down_mix && !resample) { | |
510 // No pre-processing is required. | |
511 expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_); | |
512 expected_codec_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_); | |
513 *ptr_out = &in_frame; | |
514 return 0; | |
515 } | |
516 | |
517 *ptr_out = &preprocess_frame_; | |
518 preprocess_frame_.num_channels_ = in_frame.num_channels_; | |
519 int16_t audio[WEBRTC_10MS_PCM_AUDIO]; | |
520 const int16_t* src_ptr_audio = in_frame.data_; | |
521 int16_t* dest_ptr_audio = preprocess_frame_.data_; | |
522 if (down_mix) { | |
523 // If a resampling is required the output of a down-mix is written into a | |
524 // local buffer, otherwise, it will be written to the output frame. | |
525 if (resample) | |
526 dest_ptr_audio = audio; | |
527 if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0) | |
528 return -1; | |
529 preprocess_frame_.num_channels_ = 1; | |
530 // Set the input of the resampler is the down-mixed signal. | |
531 src_ptr_audio = audio; | |
532 } | |
533 | |
534 preprocess_frame_.timestamp_ = expected_codec_ts_; | |
535 preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_; | |
536 preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_; | |
537 // If it is required, we have to do a resampling. | |
538 if (resample) { | |
539 // The result of the resampler is written to output frame. | |
540 dest_ptr_audio = preprocess_frame_.data_; | |
541 | |
542 int samples_per_channel = resampler_.Resample10Msec( | |
543 src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(), | |
544 preprocess_frame_.num_channels_, AudioFrame::kMaxDataSizeSamples, | |
545 dest_ptr_audio); | |
546 | |
547 if (samples_per_channel < 0) { | |
548 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
549 "Cannot add 10 ms audio, resampling failed"); | |
550 return -1; | |
551 } | |
552 preprocess_frame_.samples_per_channel_ = | |
553 static_cast<size_t>(samples_per_channel); | |
554 preprocess_frame_.sample_rate_hz_ = encoder_stack_->SampleRateHz(); | |
555 } | |
556 | |
557 expected_codec_ts_ += | |
558 static_cast<uint32_t>(preprocess_frame_.samples_per_channel_); | |
559 expected_in_ts_ += static_cast<uint32_t>(in_frame.samples_per_channel_); | |
560 | |
561 return 0; | |
562 } | |
563 | |
564 ///////////////////////////////////////// | |
565 // (RED) Redundant Coding | |
566 // | |
567 | |
568 bool AudioCodingModuleImpl::REDStatus() const { | |
569 rtc::CritScope lock(&acm_crit_sect_); | |
570 return encoder_factory_->codec_manager.GetStackParams()->use_red; | |
571 } | |
572 | |
573 // Configure RED status i.e on/off. | |
574 int AudioCodingModuleImpl::SetREDStatus(bool enable_red) { | |
575 #ifdef WEBRTC_CODEC_RED | |
576 rtc::CritScope lock(&acm_crit_sect_); | |
577 CreateSpeechEncoderIfNecessary(encoder_factory_.get()); | |
578 if (!encoder_factory_->codec_manager.SetCopyRed(enable_red)) { | |
579 return -1; | |
580 } | |
581 auto* sp = encoder_factory_->codec_manager.GetStackParams(); | |
582 if (sp->speech_encoder) | |
583 encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp); | |
584 return 0; | |
585 #else | |
586 WEBRTC_TRACE(webrtc::kTraceWarning, webrtc::kTraceAudioCoding, id_, | |
587 " WEBRTC_CODEC_RED is undefined"); | |
588 return -1; | |
589 #endif | |
590 } | |
591 | |
592 ///////////////////////////////////////// | |
593 // (FEC) Forward Error Correction (codec internal) | |
594 // | |
595 | |
596 bool AudioCodingModuleImpl::CodecFEC() const { | |
597 rtc::CritScope lock(&acm_crit_sect_); | |
598 return encoder_factory_->codec_manager.GetStackParams()->use_codec_fec; | |
599 } | |
600 | |
601 int AudioCodingModuleImpl::SetCodecFEC(bool enable_codec_fec) { | |
602 rtc::CritScope lock(&acm_crit_sect_); | |
603 CreateSpeechEncoderIfNecessary(encoder_factory_.get()); | |
604 if (!encoder_factory_->codec_manager.SetCodecFEC(enable_codec_fec)) { | |
605 return -1; | |
606 } | |
607 auto* sp = encoder_factory_->codec_manager.GetStackParams(); | |
608 if (sp->speech_encoder) | |
609 encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp); | |
610 if (enable_codec_fec) { | |
611 return sp->use_codec_fec ? 0 : -1; | |
612 } else { | |
613 RTC_DCHECK(!sp->use_codec_fec); | |
614 return 0; | |
615 } | |
616 } | |
617 | |
618 int AudioCodingModuleImpl::SetPacketLossRate(int loss_rate) { | |
619 rtc::CritScope lock(&acm_crit_sect_); | |
620 if (HaveValidEncoder("SetPacketLossRate")) { | |
621 encoder_stack_->SetProjectedPacketLossRate(loss_rate / 100.0); | |
622 } | |
623 return 0; | |
624 } | |
625 | |
626 ///////////////////////////////////////// | |
627 // (VAD) Voice Activity Detection | |
628 // | |
629 int AudioCodingModuleImpl::SetVAD(bool enable_dtx, | |
630 bool enable_vad, | |
631 ACMVADMode mode) { | |
632 // Note: |enable_vad| is not used; VAD is enabled based on the DTX setting. | |
633 RTC_DCHECK_EQ(enable_dtx, enable_vad); | |
634 rtc::CritScope lock(&acm_crit_sect_); | |
635 CreateSpeechEncoderIfNecessary(encoder_factory_.get()); | |
636 if (!encoder_factory_->codec_manager.SetVAD(enable_dtx, mode)) { | |
637 return -1; | |
638 } | |
639 auto* sp = encoder_factory_->codec_manager.GetStackParams(); | |
640 if (sp->speech_encoder) | |
641 encoder_stack_ = encoder_factory_->rent_a_codec.RentEncoderStack(sp); | |
642 return 0; | |
643 } | |
644 | |
645 // Get VAD/DTX settings. | |
646 int AudioCodingModuleImpl::VAD(bool* dtx_enabled, bool* vad_enabled, | |
647 ACMVADMode* mode) const { | |
648 rtc::CritScope lock(&acm_crit_sect_); | |
649 const auto* sp = encoder_factory_->codec_manager.GetStackParams(); | |
650 *dtx_enabled = *vad_enabled = sp->use_cng; | |
651 *mode = sp->vad_mode; | |
652 return 0; | |
653 } | |
654 | |
655 ///////////////////////////////////////// | |
656 // Receiver | |
657 // | |
658 | |
659 int AudioCodingModuleImpl::InitializeReceiver() { | |
660 rtc::CritScope lock(&acm_crit_sect_); | |
661 return InitializeReceiverSafe(); | |
662 } | |
663 | |
664 // Initialize receiver, resets codec database etc. | |
665 int AudioCodingModuleImpl::InitializeReceiverSafe() { | |
666 // If the receiver is already initialized then we want to destroy any | |
667 // existing decoders. After a call to this function, we should have a clean | |
668 // start-up. | |
669 if (receiver_initialized_) { | |
670 if (receiver_.RemoveAllCodecs() < 0) | |
671 return -1; | |
672 } | |
673 receiver_.ResetInitialDelay(); | |
674 receiver_.SetMinimumDelay(0); | |
675 receiver_.SetMaximumDelay(0); | |
676 receiver_.FlushBuffers(); | |
677 | |
678 // Register RED and CN. | |
679 auto db = RentACodec::Database(); | |
680 for (size_t i = 0; i < db.size(); i++) { | |
681 if (IsCodecRED(db[i]) || IsCodecCN(db[i])) { | |
682 if (receiver_.AddCodec(static_cast<int>(i), | |
683 static_cast<uint8_t>(db[i].pltype), 1, | |
684 db[i].plfreq, nullptr, db[i].plname) < 0) { | |
685 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
686 "Cannot register master codec."); | |
687 return -1; | |
688 } | |
689 } | |
690 } | |
691 receiver_initialized_ = true; | |
692 return 0; | |
693 } | |
694 | |
695 // Get current receive frequency. | |
696 int AudioCodingModuleImpl::ReceiveFrequency() const { | |
697 const auto last_packet_sample_rate = receiver_.last_packet_sample_rate_hz(); | |
698 return last_packet_sample_rate ? *last_packet_sample_rate | |
699 : receiver_.last_output_sample_rate_hz(); | |
700 } | |
701 | |
702 // Get current playout frequency. | |
703 int AudioCodingModuleImpl::PlayoutFrequency() const { | |
704 WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceAudioCoding, id_, | |
705 "PlayoutFrequency()"); | |
706 return receiver_.last_output_sample_rate_hz(); | |
707 } | |
708 | |
709 int AudioCodingModuleImpl::RegisterReceiveCodec(const CodecInst& codec) { | |
710 rtc::CritScope lock(&acm_crit_sect_); | |
711 auto* ef = encoder_factory_.get(); | |
712 return RegisterReceiveCodecUnlocked( | |
713 codec, [&] { return ef->rent_a_codec.RentIsacDecoder(codec.plfreq); }); | |
714 } | |
715 | |
716 int AudioCodingModuleImpl::RegisterReceiveCodec( | |
717 const CodecInst& codec, | |
718 FunctionView<std::unique_ptr<AudioDecoder>()> isac_factory) { | |
719 rtc::CritScope lock(&acm_crit_sect_); | |
720 return RegisterReceiveCodecUnlocked(codec, isac_factory); | |
721 } | |
722 | |
723 int AudioCodingModuleImpl::RegisterReceiveCodecUnlocked( | |
724 const CodecInst& codec, | |
725 FunctionView<std::unique_ptr<AudioDecoder>()> isac_factory) { | |
726 RTC_DCHECK(receiver_initialized_); | |
727 if (codec.channels > 2) { | |
728 LOG_F(LS_ERROR) << "Unsupported number of channels: " << codec.channels; | |
729 return -1; | |
730 } | |
731 | |
732 auto codec_id = | |
733 RentACodec::CodecIdByParams(codec.plname, codec.plfreq, codec.channels); | |
734 if (!codec_id) { | |
735 LOG_F(LS_ERROR) << "Wrong codec params to be registered as receive codec"; | |
736 return -1; | |
737 } | |
738 auto codec_index = RentACodec::CodecIndexFromId(*codec_id); | |
739 RTC_CHECK(codec_index) << "Invalid codec ID: " << static_cast<int>(*codec_id); | |
740 | |
741 // Check if the payload-type is valid. | |
742 if (!RentACodec::IsPayloadTypeValid(codec.pltype)) { | |
743 LOG_F(LS_ERROR) << "Invalid payload type " << codec.pltype << " for " | |
744 << codec.plname; | |
745 return -1; | |
746 } | |
747 | |
748 AudioDecoder* isac_decoder = nullptr; | |
749 if (STR_CASE_CMP(codec.plname, "isac") == 0) { | |
750 std::unique_ptr<AudioDecoder>& saved_isac_decoder = | |
751 codec.plfreq == 16000 ? isac_decoder_16k_ : isac_decoder_32k_; | |
752 if (!saved_isac_decoder) { | |
753 saved_isac_decoder = isac_factory(); | |
754 } | |
755 isac_decoder = saved_isac_decoder.get(); | |
756 } | |
757 return receiver_.AddCodec(*codec_index, codec.pltype, codec.channels, | |
758 codec.plfreq, isac_decoder, codec.plname); | |
759 } | |
760 | |
761 int AudioCodingModuleImpl::RegisterExternalReceiveCodec( | |
762 int rtp_payload_type, | |
763 AudioDecoder* external_decoder, | |
764 int sample_rate_hz, | |
765 int num_channels, | |
766 const std::string& name) { | |
767 rtc::CritScope lock(&acm_crit_sect_); | |
768 RTC_DCHECK(receiver_initialized_); | |
769 if (num_channels > 2 || num_channels < 0) { | |
770 LOG_F(LS_ERROR) << "Unsupported number of channels: " << num_channels; | |
771 return -1; | |
772 } | |
773 | |
774 // Check if the payload-type is valid. | |
775 if (!RentACodec::IsPayloadTypeValid(rtp_payload_type)) { | |
776 LOG_F(LS_ERROR) << "Invalid payload-type " << rtp_payload_type | |
777 << " for external decoder."; | |
778 return -1; | |
779 } | |
780 | |
781 return receiver_.AddCodec(-1 /* external */, rtp_payload_type, num_channels, | |
782 sample_rate_hz, external_decoder, name); | |
783 } | |
784 | |
785 // Get current received codec. | |
786 int AudioCodingModuleImpl::ReceiveCodec(CodecInst* current_codec) const { | |
787 rtc::CritScope lock(&acm_crit_sect_); | |
788 return receiver_.LastAudioCodec(current_codec); | |
789 } | |
790 | |
791 // Incoming packet from network parsed and ready for decode. | |
792 int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload, | |
793 const size_t payload_length, | |
794 const WebRtcRTPHeader& rtp_header) { | |
795 return receiver_.InsertPacket( | |
796 rtp_header, | |
797 rtc::ArrayView<const uint8_t>(incoming_payload, payload_length)); | |
798 } | |
799 | |
800 // Minimum playout delay (Used for lip-sync). | |
801 int AudioCodingModuleImpl::SetMinimumPlayoutDelay(int time_ms) { | |
802 if ((time_ms < 0) || (time_ms > 10000)) { | |
803 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
804 "Delay must be in the range of 0-1000 milliseconds."); | |
805 return -1; | |
806 } | |
807 return receiver_.SetMinimumDelay(time_ms); | |
808 } | |
809 | |
810 int AudioCodingModuleImpl::SetMaximumPlayoutDelay(int time_ms) { | |
811 if ((time_ms < 0) || (time_ms > 10000)) { | |
812 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
813 "Delay must be in the range of 0-1000 milliseconds."); | |
814 return -1; | |
815 } | |
816 return receiver_.SetMaximumDelay(time_ms); | |
817 } | |
818 | |
819 // Get 10 milliseconds of raw audio data to play out. | |
820 // Automatic resample to the requested frequency. | |
821 int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz, | |
822 AudioFrame* audio_frame, | |
823 bool* muted) { | |
824 // GetAudio always returns 10 ms, at the requested sample rate. | |
825 if (receiver_.GetAudio(desired_freq_hz, audio_frame, muted) != 0) { | |
826 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
827 "PlayoutData failed, RecOut Failed"); | |
828 return -1; | |
829 } | |
830 audio_frame->id_ = id_; | |
831 return 0; | |
832 } | |
833 | |
834 int AudioCodingModuleImpl::PlayoutData10Ms(int desired_freq_hz, | |
835 AudioFrame* audio_frame) { | |
836 bool muted; | |
837 int ret = PlayoutData10Ms(desired_freq_hz, audio_frame, &muted); | |
838 RTC_DCHECK(!muted); | |
839 return ret; | |
840 } | |
841 | |
842 ///////////////////////////////////////// | |
843 // Statistics | |
844 // | |
845 | |
846 // TODO(turajs) change the return value to void. Also change the corresponding | |
847 // NetEq function. | |
848 int AudioCodingModuleImpl::GetNetworkStatistics(NetworkStatistics* statistics) { | |
849 receiver_.GetNetworkStatistics(statistics); | |
850 return 0; | |
851 } | |
852 | |
853 int AudioCodingModuleImpl::RegisterVADCallback(ACMVADCallback* vad_callback) { | |
854 WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceAudioCoding, id_, | |
855 "RegisterVADCallback()"); | |
856 rtc::CritScope lock(&callback_crit_sect_); | |
857 vad_callback_ = vad_callback; | |
858 return 0; | |
859 } | |
860 | |
861 // TODO(kwiberg): Remove this method, and have callers call IncomingPacket | |
862 // instead. The translation logic and state belong with them, not with | |
863 // AudioCodingModuleImpl. | |
864 int AudioCodingModuleImpl::IncomingPayload(const uint8_t* incoming_payload, | |
865 size_t payload_length, | |
866 uint8_t payload_type, | |
867 uint32_t timestamp) { | |
868 // We are not acquiring any lock when interacting with |aux_rtp_header_| no | |
869 // other method uses this member variable. | |
870 if (!aux_rtp_header_) { | |
871 // This is the first time that we are using |dummy_rtp_header_| | |
872 // so we have to create it. | |
873 aux_rtp_header_.reset(new WebRtcRTPHeader); | |
874 aux_rtp_header_->header.payloadType = payload_type; | |
875 // Don't matter in this case. | |
876 aux_rtp_header_->header.ssrc = 0; | |
877 aux_rtp_header_->header.markerBit = false; | |
878 // Start with random numbers. | |
879 aux_rtp_header_->header.sequenceNumber = 0x1234; // Arbitrary. | |
880 aux_rtp_header_->type.Audio.channel = 1; | |
881 } | |
882 | |
883 aux_rtp_header_->header.timestamp = timestamp; | |
884 IncomingPacket(incoming_payload, payload_length, *aux_rtp_header_); | |
885 // Get ready for the next payload. | |
886 aux_rtp_header_->header.sequenceNumber++; | |
887 return 0; | |
888 } | |
889 | |
890 int AudioCodingModuleImpl::SetOpusApplication(OpusApplicationMode application) { | |
891 rtc::CritScope lock(&acm_crit_sect_); | |
892 if (!HaveValidEncoder("SetOpusApplication")) { | |
893 return -1; | |
894 } | |
895 AudioEncoder::Application app; | |
896 switch (application) { | |
897 case kVoip: | |
898 app = AudioEncoder::Application::kSpeech; | |
899 break; | |
900 case kAudio: | |
901 app = AudioEncoder::Application::kAudio; | |
902 break; | |
903 default: | |
904 FATAL(); | |
905 return 0; | |
906 } | |
907 return encoder_stack_->SetApplication(app) ? 0 : -1; | |
908 } | |
909 | |
910 // Informs Opus encoder of the maximum playback rate the receiver will render. | |
911 int AudioCodingModuleImpl::SetOpusMaxPlaybackRate(int frequency_hz) { | |
912 rtc::CritScope lock(&acm_crit_sect_); | |
913 if (!HaveValidEncoder("SetOpusMaxPlaybackRate")) { | |
914 return -1; | |
915 } | |
916 encoder_stack_->SetMaxPlaybackRate(frequency_hz); | |
917 return 0; | |
918 } | |
919 | |
920 int AudioCodingModuleImpl::EnableOpusDtx() { | |
921 rtc::CritScope lock(&acm_crit_sect_); | |
922 if (!HaveValidEncoder("EnableOpusDtx")) { | |
923 return -1; | |
924 } | |
925 return encoder_stack_->SetDtx(true) ? 0 : -1; | |
926 } | |
927 | |
928 int AudioCodingModuleImpl::DisableOpusDtx() { | |
929 rtc::CritScope lock(&acm_crit_sect_); | |
930 if (!HaveValidEncoder("DisableOpusDtx")) { | |
931 return -1; | |
932 } | |
933 return encoder_stack_->SetDtx(false) ? 0 : -1; | |
934 } | |
935 | |
936 int32_t AudioCodingModuleImpl::PlayoutTimestamp(uint32_t* timestamp) { | |
937 rtc::Optional<uint32_t> ts = PlayoutTimestamp(); | |
938 if (!ts) | |
939 return -1; | |
940 *timestamp = *ts; | |
941 return 0; | |
942 } | |
943 | |
944 rtc::Optional<uint32_t> AudioCodingModuleImpl::PlayoutTimestamp() { | |
945 return receiver_.GetPlayoutTimestamp(); | |
946 } | |
947 | |
948 bool AudioCodingModuleImpl::HaveValidEncoder(const char* caller_name) const { | |
949 if (!encoder_stack_) { | |
950 WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_, | |
951 "%s failed: No send codec is registered.", caller_name); | |
952 return false; | |
953 } | |
954 return true; | |
955 } | |
956 | |
957 int AudioCodingModuleImpl::UnregisterReceiveCodec(uint8_t payload_type) { | |
958 return receiver_.RemoveCodec(payload_type); | |
959 } | |
960 | |
961 int AudioCodingModuleImpl::EnableNack(size_t max_nack_list_size) { | |
962 return receiver_.EnableNack(max_nack_list_size); | |
963 } | |
964 | |
965 void AudioCodingModuleImpl::DisableNack() { | |
966 receiver_.DisableNack(); | |
967 } | |
968 | |
969 std::vector<uint16_t> AudioCodingModuleImpl::GetNackList( | |
970 int64_t round_trip_time_ms) const { | |
971 return receiver_.GetNackList(round_trip_time_ms); | |
972 } | |
973 | |
974 int AudioCodingModuleImpl::LeastRequiredDelayMs() const { | |
975 return receiver_.LeastRequiredDelayMs(); | |
976 } | |
977 | |
978 void AudioCodingModuleImpl::GetDecodingCallStatistics( | |
979 AudioDecodingCallStats* call_stats) const { | |
980 receiver_.GetDecodingCallStatistics(call_stats); | |
981 } | |
982 | |
983 } // namespace acm2 | |
984 } // namespace webrtc | |
OLD | NEW |