OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" | 11 #include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" |
12 | 12 |
13 #include <algorithm> | 13 #include <algorithm> |
14 #include <limits> | 14 #include <limits> |
15 | 15 |
16 namespace webrtc { | 16 namespace webrtc { |
17 | 17 |
18 namespace { | 18 namespace { |
19 | 19 |
20 const int kMaxFrameSizeMs = 60; | 20 const int kMaxFrameSizeMs = 60; |
21 | 21 |
22 rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> CreateCngInst( | 22 rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> CreateCngInst( |
23 int sample_rate_hz, | 23 int sample_rate_hz, |
24 int sid_frame_interval_ms, | 24 int sid_frame_interval_ms, |
25 int num_cng_coefficients) { | 25 int num_cng_coefficients) { |
26 rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst; | 26 rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst; |
27 CHECK_EQ(0, WebRtcCng_CreateEnc(cng_inst.accept())); | 27 RTC_CHECK_EQ(0, WebRtcCng_CreateEnc(cng_inst.accept())); |
28 CHECK_EQ(0, WebRtcCng_InitEnc(cng_inst.get(), sample_rate_hz, | 28 RTC_CHECK_EQ(0, |
29 sid_frame_interval_ms, num_cng_coefficients)); | 29 WebRtcCng_InitEnc(cng_inst.get(), sample_rate_hz, |
| 30 sid_frame_interval_ms, num_cng_coefficients)); |
30 return cng_inst; | 31 return cng_inst; |
31 } | 32 } |
32 | 33 |
33 } // namespace | 34 } // namespace |
34 | 35 |
35 bool AudioEncoderCng::Config::IsOk() const { | 36 bool AudioEncoderCng::Config::IsOk() const { |
36 if (num_channels != 1) | 37 if (num_channels != 1) |
37 return false; | 38 return false; |
38 if (!speech_encoder) | 39 if (!speech_encoder) |
39 return false; | 40 return false; |
40 if (num_channels != speech_encoder->NumChannels()) | 41 if (num_channels != speech_encoder->NumChannels()) |
41 return false; | 42 return false; |
42 if (sid_frame_interval_ms < | 43 if (sid_frame_interval_ms < |
43 static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10)) | 44 static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10)) |
44 return false; | 45 return false; |
45 if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || | 46 if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || |
46 num_cng_coefficients <= 0) | 47 num_cng_coefficients <= 0) |
47 return false; | 48 return false; |
48 return true; | 49 return true; |
49 } | 50 } |
50 | 51 |
51 AudioEncoderCng::AudioEncoderCng(const Config& config) | 52 AudioEncoderCng::AudioEncoderCng(const Config& config) |
52 : speech_encoder_(config.speech_encoder), | 53 : speech_encoder_(config.speech_encoder), |
53 cng_payload_type_(config.payload_type), | 54 cng_payload_type_(config.payload_type), |
54 num_cng_coefficients_(config.num_cng_coefficients), | 55 num_cng_coefficients_(config.num_cng_coefficients), |
55 sid_frame_interval_ms_(config.sid_frame_interval_ms), | 56 sid_frame_interval_ms_(config.sid_frame_interval_ms), |
56 last_frame_active_(true), | 57 last_frame_active_(true), |
57 vad_(config.vad ? rtc_make_scoped_ptr(config.vad) | 58 vad_(config.vad ? rtc_make_scoped_ptr(config.vad) |
58 : CreateVad(config.vad_mode)) { | 59 : CreateVad(config.vad_mode)) { |
59 CHECK(config.IsOk()) << "Invalid configuration."; | 60 RTC_CHECK(config.IsOk()) << "Invalid configuration."; |
60 cng_inst_ = CreateCngInst(SampleRateHz(), sid_frame_interval_ms_, | 61 cng_inst_ = CreateCngInst(SampleRateHz(), sid_frame_interval_ms_, |
61 num_cng_coefficients_); | 62 num_cng_coefficients_); |
62 } | 63 } |
63 | 64 |
64 AudioEncoderCng::~AudioEncoderCng() = default; | 65 AudioEncoderCng::~AudioEncoderCng() = default; |
65 | 66 |
66 size_t AudioEncoderCng::MaxEncodedBytes() const { | 67 size_t AudioEncoderCng::MaxEncodedBytes() const { |
67 const size_t max_encoded_bytes_active = speech_encoder_->MaxEncodedBytes(); | 68 const size_t max_encoded_bytes_active = speech_encoder_->MaxEncodedBytes(); |
68 const size_t max_encoded_bytes_passive = | 69 const size_t max_encoded_bytes_passive = |
69 rtc::CheckedDivExact(kMaxFrameSizeMs, 10) * SamplesPer10msFrame(); | 70 rtc::CheckedDivExact(kMaxFrameSizeMs, 10) * SamplesPer10msFrame(); |
(...skipping 22 matching lines...) Expand all Loading... |
92 | 93 |
93 int AudioEncoderCng::GetTargetBitrate() const { | 94 int AudioEncoderCng::GetTargetBitrate() const { |
94 return speech_encoder_->GetTargetBitrate(); | 95 return speech_encoder_->GetTargetBitrate(); |
95 } | 96 } |
96 | 97 |
97 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal( | 98 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal( |
98 uint32_t rtp_timestamp, | 99 uint32_t rtp_timestamp, |
99 const int16_t* audio, | 100 const int16_t* audio, |
100 size_t max_encoded_bytes, | 101 size_t max_encoded_bytes, |
101 uint8_t* encoded) { | 102 uint8_t* encoded) { |
102 CHECK_GE(max_encoded_bytes, static_cast<size_t>(num_cng_coefficients_ + 1)); | 103 RTC_CHECK_GE(max_encoded_bytes, |
| 104 static_cast<size_t>(num_cng_coefficients_ + 1)); |
103 const size_t samples_per_10ms_frame = SamplesPer10msFrame(); | 105 const size_t samples_per_10ms_frame = SamplesPer10msFrame(); |
104 CHECK_EQ(speech_buffer_.size(), | 106 RTC_CHECK_EQ(speech_buffer_.size(), |
105 rtp_timestamps_.size() * samples_per_10ms_frame); | 107 rtp_timestamps_.size() * samples_per_10ms_frame); |
106 rtp_timestamps_.push_back(rtp_timestamp); | 108 rtp_timestamps_.push_back(rtp_timestamp); |
107 for (size_t i = 0; i < samples_per_10ms_frame; ++i) { | 109 for (size_t i = 0; i < samples_per_10ms_frame; ++i) { |
108 speech_buffer_.push_back(audio[i]); | 110 speech_buffer_.push_back(audio[i]); |
109 } | 111 } |
110 const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket(); | 112 const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket(); |
111 if (rtp_timestamps_.size() < frames_to_encode) { | 113 if (rtp_timestamps_.size() < frames_to_encode) { |
112 return EncodedInfo(); | 114 return EncodedInfo(); |
113 } | 115 } |
114 CHECK_LE(static_cast<int>(frames_to_encode * 10), kMaxFrameSizeMs) | 116 RTC_CHECK_LE(static_cast<int>(frames_to_encode * 10), kMaxFrameSizeMs) |
115 << "Frame size cannot be larger than " << kMaxFrameSizeMs | 117 << "Frame size cannot be larger than " << kMaxFrameSizeMs |
116 << " ms when using VAD/CNG."; | 118 << " ms when using VAD/CNG."; |
117 | 119 |
118 // Group several 10 ms blocks per VAD call. Call VAD once or twice using the | 120 // Group several 10 ms blocks per VAD call. Call VAD once or twice using the |
119 // following split sizes: | 121 // following split sizes: |
120 // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; | 122 // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; |
121 // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. | 123 // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. |
122 size_t blocks_in_first_vad_call = | 124 size_t blocks_in_first_vad_call = |
123 (frames_to_encode > 3 ? 3 : frames_to_encode); | 125 (frames_to_encode > 3 ? 3 : frames_to_encode); |
124 if (frames_to_encode == 4) | 126 if (frames_to_encode == 4) |
125 blocks_in_first_vad_call = 2; | 127 blocks_in_first_vad_call = 2; |
126 CHECK_GE(frames_to_encode, blocks_in_first_vad_call); | 128 RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call); |
127 const size_t blocks_in_second_vad_call = | 129 const size_t blocks_in_second_vad_call = |
128 frames_to_encode - blocks_in_first_vad_call; | 130 frames_to_encode - blocks_in_first_vad_call; |
129 | 131 |
130 // Check if all of the buffer is passive speech. Start with checking the first | 132 // Check if all of the buffer is passive speech. Start with checking the first |
131 // block. | 133 // block. |
132 Vad::Activity activity = vad_->VoiceActivity( | 134 Vad::Activity activity = vad_->VoiceActivity( |
133 &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, | 135 &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, |
134 SampleRateHz()); | 136 SampleRateHz()); |
135 if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { | 137 if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { |
136 // Only check the second block if the first was passive. | 138 // Only check the second block if the first was passive. |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
199 speech_encoder_->SetTargetBitrate(bits_per_second); | 201 speech_encoder_->SetTargetBitrate(bits_per_second); |
200 } | 202 } |
201 | 203 |
202 AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( | 204 AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( |
203 size_t frames_to_encode, | 205 size_t frames_to_encode, |
204 size_t max_encoded_bytes, | 206 size_t max_encoded_bytes, |
205 uint8_t* encoded) { | 207 uint8_t* encoded) { |
206 bool force_sid = last_frame_active_; | 208 bool force_sid = last_frame_active_; |
207 bool output_produced = false; | 209 bool output_produced = false; |
208 const size_t samples_per_10ms_frame = SamplesPer10msFrame(); | 210 const size_t samples_per_10ms_frame = SamplesPer10msFrame(); |
209 CHECK_GE(max_encoded_bytes, frames_to_encode * samples_per_10ms_frame); | 211 RTC_CHECK_GE(max_encoded_bytes, frames_to_encode * samples_per_10ms_frame); |
210 AudioEncoder::EncodedInfo info; | 212 AudioEncoder::EncodedInfo info; |
211 for (size_t i = 0; i < frames_to_encode; ++i) { | 213 for (size_t i = 0; i < frames_to_encode; ++i) { |
212 // It's important not to pass &info.encoded_bytes directly to | 214 // It's important not to pass &info.encoded_bytes directly to |
213 // WebRtcCng_Encode(), since later loop iterations may return zero in that | 215 // WebRtcCng_Encode(), since later loop iterations may return zero in that |
214 // value, in which case we don't want to overwrite any value from an earlier | 216 // value, in which case we don't want to overwrite any value from an earlier |
215 // iteration. | 217 // iteration. |
216 size_t encoded_bytes_tmp = 0; | 218 size_t encoded_bytes_tmp = 0; |
217 CHECK_GE(WebRtcCng_Encode(cng_inst_.get(), | 219 RTC_CHECK_GE(WebRtcCng_Encode(cng_inst_.get(), |
218 &speech_buffer_[i * samples_per_10ms_frame], | 220 &speech_buffer_[i * samples_per_10ms_frame], |
219 samples_per_10ms_frame, | 221 samples_per_10ms_frame, encoded, |
220 encoded, &encoded_bytes_tmp, force_sid), 0); | 222 &encoded_bytes_tmp, force_sid), |
| 223 0); |
221 if (encoded_bytes_tmp > 0) { | 224 if (encoded_bytes_tmp > 0) { |
222 CHECK(!output_produced); | 225 RTC_CHECK(!output_produced); |
223 info.encoded_bytes = encoded_bytes_tmp; | 226 info.encoded_bytes = encoded_bytes_tmp; |
224 output_produced = true; | 227 output_produced = true; |
225 force_sid = false; | 228 force_sid = false; |
226 } | 229 } |
227 } | 230 } |
228 info.encoded_timestamp = rtp_timestamps_.front(); | 231 info.encoded_timestamp = rtp_timestamps_.front(); |
229 info.payload_type = cng_payload_type_; | 232 info.payload_type = cng_payload_type_; |
230 info.send_even_if_empty = true; | 233 info.send_even_if_empty = true; |
231 info.speech = false; | 234 info.speech = false; |
232 return info; | 235 return info; |
233 } | 236 } |
234 | 237 |
235 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive( | 238 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive( |
236 size_t frames_to_encode, | 239 size_t frames_to_encode, |
237 size_t max_encoded_bytes, | 240 size_t max_encoded_bytes, |
238 uint8_t* encoded) { | 241 uint8_t* encoded) { |
239 const size_t samples_per_10ms_frame = SamplesPer10msFrame(); | 242 const size_t samples_per_10ms_frame = SamplesPer10msFrame(); |
240 AudioEncoder::EncodedInfo info; | 243 AudioEncoder::EncodedInfo info; |
241 for (size_t i = 0; i < frames_to_encode; ++i) { | 244 for (size_t i = 0; i < frames_to_encode; ++i) { |
242 info = speech_encoder_->Encode( | 245 info = speech_encoder_->Encode( |
243 rtp_timestamps_.front(), &speech_buffer_[i * samples_per_10ms_frame], | 246 rtp_timestamps_.front(), &speech_buffer_[i * samples_per_10ms_frame], |
244 samples_per_10ms_frame, max_encoded_bytes, encoded); | 247 samples_per_10ms_frame, max_encoded_bytes, encoded); |
245 if (i + 1 == frames_to_encode) { | 248 if (i + 1 == frames_to_encode) { |
246 CHECK_GT(info.encoded_bytes, 0u) << "Encoder didn't deliver data."; | 249 RTC_CHECK_GT(info.encoded_bytes, 0u) << "Encoder didn't deliver data."; |
247 } else { | 250 } else { |
248 CHECK_EQ(info.encoded_bytes, 0u) << "Encoder delivered data too early."; | 251 RTC_CHECK_EQ(info.encoded_bytes, 0u) |
| 252 << "Encoder delivered data too early."; |
249 } | 253 } |
250 } | 254 } |
251 return info; | 255 return info; |
252 } | 256 } |
253 | 257 |
254 size_t AudioEncoderCng::SamplesPer10msFrame() const { | 258 size_t AudioEncoderCng::SamplesPer10msFrame() const { |
255 return rtc::CheckedDivExact(10 * SampleRateHz(), 1000); | 259 return rtc::CheckedDivExact(10 * SampleRateHz(), 1000); |
256 } | 260 } |
257 | 261 |
258 } // namespace webrtc | 262 } // namespace webrtc |
OLD | NEW |