Index: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
index cd91fa3bdeea5c290dbe2e00fe4e2404f8fb716a..04bbd161ec699338a2a36ac9a6f912ee39207132 100644 |
--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
@@ -57,6 +57,12 @@ VP9Encoder* VP9Encoder::Create() { |
return new VP9EncoderImpl(); |
} |
+void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, |
+ void* user_data) { |
+ VP9EncoderImpl* enc = (VP9EncoderImpl*)(user_data); |
+ enc->GetEncodedLayerFrame(pkt); |
+} |
+ |
VP9EncoderImpl::VP9EncoderImpl() |
: encoded_image_(), |
encoded_complete_callback_(NULL), |
@@ -67,7 +73,12 @@ VP9EncoderImpl::VP9EncoderImpl() |
rc_max_intra_target_(0), |
encoder_(NULL), |
config_(NULL), |
- raw_(NULL) { |
+ raw_(NULL), |
+ input_image_(NULL), |
+ tl0_pic_idx_(0), |
+ gof_idx_(0), |
+ num_temporal_layers_(0), |
+ num_spatial_layers_(0) { |
memset(&codec_, 0, sizeof(codec_)); |
uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp()); |
srand(seed); |
@@ -101,6 +112,55 @@ int VP9EncoderImpl::Release() { |
return WEBRTC_VIDEO_CODEC_OK; |
} |
+bool VP9EncoderImpl::SetSvcRates() { |
+ float rate_ratio[VPX_MAX_LAYERS] = {0}; |
+ float total = 0; |
+ uint8_t i = 0; |
+ |
+ for (i = 0; i < num_spatial_layers_; ++i) { |
+ if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 || |
+ svc_internal_.svc_params.scaling_factor_den[i] <= 0) { |
+ return false; |
+ } |
+ rate_ratio[i] = static_cast<float>( |
+ svc_internal_.svc_params.scaling_factor_num[i]) / |
+ svc_internal_.svc_params.scaling_factor_den[i]; |
+ total += rate_ratio[i]; |
+ } |
+ |
+ for (i = 0; i < num_spatial_layers_; ++i) { |
+ config_->ss_target_bitrate[i] = static_cast<unsigned int>( |
+ config_->rc_target_bitrate * rate_ratio[i] / total); |
+ if (num_temporal_layers_ == 1) { |
+ config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i]; |
+ } else if (num_temporal_layers_ == 2) { |
+ config_->layer_target_bitrate[i * num_temporal_layers_] = |
+ config_->ss_target_bitrate[i] * 2 / 3; |
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = |
+ config_->ss_target_bitrate[i]; |
+ } else if (num_temporal_layers_ == 3) { |
+ config_->layer_target_bitrate[i * num_temporal_layers_] = |
+ config_->ss_target_bitrate[i] / 2; |
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] = |
+ config_->layer_target_bitrate[i * num_temporal_layers_] + |
+ (config_->ss_target_bitrate[i] / 4); |
+ config_->layer_target_bitrate[i * num_temporal_layers_ + 2] = |
+ config_->ss_target_bitrate[i]; |
+ } else { |
+ return false; |
+ } |
+ } |
+ |
+ // For now, temporal layers only supported when having one spatial layer. |
+ if (num_spatial_layers_ == 1) { |
+ for (i = 0; i < num_temporal_layers_; ++i) { |
+ config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i]; |
+ } |
+ } |
+ |
+ return true; |
+} |
+ |
int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit, |
uint32_t new_framerate) { |
if (!inited_) { |
@@ -118,6 +178,11 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit, |
} |
config_->rc_target_bitrate = new_bitrate_kbit; |
codec_.maxFramerate = new_framerate; |
+ |
+ if (!SetSvcRates()) { |
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
+ } |
+ |
// Update encoder context |
if (vpx_codec_enc_config_set(encoder_, config_)) { |
return WEBRTC_VIDEO_CODEC_ERROR; |
@@ -144,6 +209,13 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, |
if (number_of_cores < 1) { |
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
} |
+ if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) { |
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
+ } |
+ // For now, only support one spatial layer. |
+ if (inst->codecSpecific.VP9.numberOfSpatialLayers != 1) { |
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
+ } |
int retVal = Release(); |
if (retVal < 0) { |
return retVal; |
@@ -158,6 +230,12 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, |
if (&codec_ != inst) { |
codec_ = *inst; |
} |
+ |
+ num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers; |
+ num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers; |
+ if (num_temporal_layers_ == 0) |
+ num_temporal_layers_ = 1; |
+ |
// Random start 16 bits is enough. |
picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF; |
// Allocate memory for encoded image |
@@ -209,13 +287,57 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, |
config_->g_threads = NumberOfThreads(config_->g_w, |
config_->g_h, |
number_of_cores); |
+ |
cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h); |
+ |
+ // TODO(asapersson): Check configuration of temporal switch up and increase |
+ // pattern length. |
+ if (num_temporal_layers_ == 1) { |
+ gof_.SetGofInfoVP9(kTemporalStructureMode1); |
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; |
+ config_->ts_number_layers = 1; |
+ config_->ts_rate_decimator[0] = 1; |
+ config_->ts_periodicity = 1; |
+ config_->ts_layer_id[0] = 0; |
+ } else if (num_temporal_layers_ == 2) { |
+ gof_.SetGofInfoVP9(kTemporalStructureMode2); |
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101; |
+ config_->ts_number_layers = 2; |
+ config_->ts_rate_decimator[0] = 2; |
+ config_->ts_rate_decimator[1] = 1; |
+ config_->ts_periodicity = 2; |
+ config_->ts_layer_id[0] = 0; |
+ config_->ts_layer_id[1] = 1; |
+ } else if (num_temporal_layers_ == 3) { |
+ gof_.SetGofInfoVP9(kTemporalStructureMode3); |
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212; |
+ config_->ts_number_layers = 3; |
+ config_->ts_rate_decimator[0] = 4; |
+ config_->ts_rate_decimator[1] = 2; |
+ config_->ts_rate_decimator[2] = 1; |
+ config_->ts_periodicity = 4; |
+ config_->ts_layer_id[0] = 0; |
+ config_->ts_layer_id[1] = 2; |
+ config_->ts_layer_id[2] = 1; |
+ config_->ts_layer_id[3] = 2; |
+ } else { |
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
+ } |
+ |
+ tl0_pic_idx_ = static_cast<uint8_t>(rand()); |
+ |
return InitAndSetControlSettings(inst); |
} |
int VP9EncoderImpl::NumberOfThreads(int width, |
int height, |
int number_of_cores) { |
+ // For the current libvpx library, only 1 thread is supported when SVC is |
+ // turned on. |
+ if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) { |
+ return 1; |
+ } |
+ |
// Keep the number of encoder threads equal to the possible number of column |
// tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS. |
if (width * height >= 1280 * 720 && number_of_cores > 4) { |
@@ -229,6 +351,27 @@ int VP9EncoderImpl::NumberOfThreads(int width, |
} |
int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { |
+ |
+ config_->ss_number_layers = num_spatial_layers_; |
+ |
+ if (num_spatial_layers_ > 1) { |
+ config_->rc_min_quantizer = 0; |
+ config_->rc_max_quantizer = 63; |
+ } |
+ int scaling_factor_num = 256; |
+ for (int i = num_spatial_layers_ - 1; i >= 0; --i) { |
+ svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer; |
+ svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer; |
+ // 1:2 scaling in each dimension. |
+ svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num; |
+ svc_internal_.svc_params.scaling_factor_den[i] = 256; |
+ scaling_factor_num /= 2; |
+ } |
+ |
+ if (!SetSvcRates()) { |
+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
+ } |
+ |
if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) { |
return WEBRTC_VIDEO_CODEC_UNINITIALIZED; |
} |
@@ -237,6 +380,19 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { |
rc_max_intra_target_); |
vpx_codec_control(encoder_, VP9E_SET_AQ_MODE, |
inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0); |
+ |
+ vpx_codec_control( |
+ encoder_, VP9E_SET_SVC, |
+ (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0); |
+ if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) { |
+ vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, |
+ &svc_internal_.svc_params); |
+ } |
+ // Register callback for getting each spatial layer. |
+ vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { |
+ VP9EncoderImpl::EncoderOutputCodedPacketCallback, (void*)(this)}; |
+ vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, (void*)(&cbp)); |
+ |
// Control function to set the number of column tiles in encoding a frame, in |
// log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns. |
// The number tile columns will be capped by the encoder based on image size |
@@ -286,6 +442,13 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, |
} |
DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w)); |
DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h)); |
+ |
+ // Set input image for use in the callback. |
+ // This was necessary since you need some information from input_image. |
+ // You can save only the necessary information (such as timestamp) instead of |
+ // doing this. |
+ input_image_ = &input_image; |
+ |
// Image in vpx_image_t format. |
// Input image is const. VPX's raw image is not defined as const. |
raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane)); |
@@ -308,7 +471,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, |
return WEBRTC_VIDEO_CODEC_ERROR; |
} |
timestamp_ += duration; |
- return GetEncodedPartitions(input_image); |
+ |
+ return WEBRTC_VIDEO_CODEC_OK; |
} |
void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, |
@@ -317,20 +481,83 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, |
assert(codec_specific != NULL); |
codec_specific->codecType = kVideoCodecVP9; |
CodecSpecificInfoVP9 *vp9_info = &(codec_specific->codecSpecific.VP9); |
- vp9_info->pictureId = picture_id_; |
- vp9_info->keyIdx = kNoKeyIdx; |
- vp9_info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0; |
- // TODO(marpan): Temporal layers are supported in the current VP9 version, |
- // but for now use 1 temporal layer encoding. Will update this when temporal |
- // layer support for VP9 is added in webrtc. |
- vp9_info->temporalIdx = kNoTemporalIdx; |
- vp9_info->layerSync = false; |
- vp9_info->tl0PicIdx = kNoTl0PicIdx; |
- picture_id_ = (picture_id_ + 1) & 0x7FFF; |
+ // TODO(asapersson): Set correct values. |
+ vp9_info->inter_pic_predicted = |
+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true; |
+ vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode; |
+ vp9_info->ss_data_available = |
+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; |
+ if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { |
+ gof_idx_ = 0; |
+ } |
+ |
+ vpx_svc_layer_id_t layer_id = {0}; |
+ vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); |
+ |
+ assert(num_temporal_layers_ > 0); |
+ assert(num_spatial_layers_ > 0); |
+ if (num_temporal_layers_ == 1) { |
+ assert(layer_id.temporal_layer_id == 0); |
+ vp9_info->temporal_idx = kNoTemporalIdx; |
+ } else { |
+ vp9_info->temporal_idx = layer_id.temporal_layer_id; |
+ } |
+ if (num_spatial_layers_ == 1) { |
+ assert(layer_id.spatial_layer_id == 0); |
+ vp9_info->spatial_idx = kNoSpatialIdx; |
+ } else { |
+ vp9_info->spatial_idx = layer_id.spatial_layer_id; |
+ } |
+ if (layer_id.spatial_layer_id != 0) { |
+ vp9_info->ss_data_available = false; |
+ } |
+ |
+ if (vp9_info->flexible_mode) { |
+ vp9_info->gof_idx = kNoGofIdx; |
+ } else { |
+ vp9_info->gof_idx = |
+ static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof); |
+ } |
+ |
+ // TODO(asapersson): this info has to be obtained from the encoder. |
+ vp9_info->temporal_up_switch = true; |
+ |
+ if (layer_id.spatial_layer_id == 0) { |
+ picture_id_ = (picture_id_ + 1) & 0x7FFF; |
+ // TODO(asapersson): this info has to be obtained from the encoder. |
+ vp9_info->inter_layer_predicted = false; |
+ } else { |
+ // TODO(asapersson): this info has to be obtained from the encoder. |
+ vp9_info->inter_layer_predicted = true; |
+ } |
+ |
+ vp9_info->picture_id = picture_id_; |
+ |
+ if (!vp9_info->flexible_mode) { |
+ if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) { |
+ tl0_pic_idx_++; |
+ } |
+ vp9_info->tl0_pic_idx = tl0_pic_idx_; |
+ } |
+ |
+ if (vp9_info->ss_data_available) { |
+ vp9_info->num_spatial_layers = num_spatial_layers_; |
+ vp9_info->spatial_layer_resolution_present = true; |
+ for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) { |
+ vp9_info->width[i] = codec_.width * |
+ svc_internal_.svc_params.scaling_factor_num[i] / |
+ svc_internal_.svc_params.scaling_factor_den[i]; |
+ vp9_info->height[i] = codec_.height * |
+ svc_internal_.svc_params.scaling_factor_num[i] / |
+ svc_internal_.svc_params.scaling_factor_den[i]; |
+ } |
+ if (!vp9_info->flexible_mode) { |
+ vp9_info->gof.CopyGofInfoVP9(gof_); |
+ } |
+ } |
} |
-int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) { |
- vpx_codec_iter_t iter = NULL; |
+int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { |
encoded_image_._length = 0; |
encoded_image_._frameType = kDeltaFrame; |
RTPFragmentationHeader frag_info; |
@@ -339,44 +566,33 @@ int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) { |
frag_info.VerifyAndAllocateFragmentationHeader(1); |
int part_idx = 0; |
CodecSpecificInfo codec_specific; |
- const vpx_codec_cx_pkt_t *pkt = NULL; |
- while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) { |
- switch (pkt->kind) { |
- case VPX_CODEC_CX_FRAME_PKT: { |
- memcpy(&encoded_image_._buffer[encoded_image_._length], |
- pkt->data.frame.buf, |
- pkt->data.frame.sz); |
- frag_info.fragmentationOffset[part_idx] = encoded_image_._length; |
- frag_info.fragmentationLength[part_idx] = |
- static_cast<uint32_t>(pkt->data.frame.sz); |
- frag_info.fragmentationPlType[part_idx] = 0; |
- frag_info.fragmentationTimeDiff[part_idx] = 0; |
- encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz); |
- assert(encoded_image_._length <= encoded_image_._size); |
- break; |
- } |
- default: { |
- break; |
- } |
- } |
- // End of frame. |
- if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) { |
- // Check if encoded frame is a key frame. |
- if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { |
- encoded_image_._frameType = kKeyFrame; |
- } |
- PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp()); |
- break; |
- } |
+ |
+ assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT); |
+ memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf, |
+ pkt->data.frame.sz); |
+ frag_info.fragmentationOffset[part_idx] = encoded_image_._length; |
+ frag_info.fragmentationLength[part_idx] = |
+ static_cast<uint32_t>(pkt->data.frame.sz); |
+ frag_info.fragmentationPlType[part_idx] = 0; |
+ frag_info.fragmentationTimeDiff[part_idx] = 0; |
+ encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz); |
+ assert(encoded_image_._length <= encoded_image_._size); |
+ |
+ // End of frame. |
+ // Check if encoded frame is a key frame. |
+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { |
+ encoded_image_._frameType = kKeyFrame; |
} |
+ PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp()); |
+ |
if (encoded_image_._length > 0) { |
TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length); |
- encoded_image_._timeStamp = input_image.timestamp(); |
- encoded_image_.capture_time_ms_ = input_image.render_time_ms(); |
+ encoded_image_._timeStamp = input_image_->timestamp(); |
+ encoded_image_.capture_time_ms_ = input_image_->render_time_ms(); |
encoded_image_._encodedHeight = raw_->d_h; |
encoded_image_._encodedWidth = raw_->d_w; |
encoded_complete_callback_->Encoded(encoded_image_, &codec_specific, |
- &frag_info); |
+ &frag_info); |
} |
return WEBRTC_VIDEO_CODEC_OK; |
} |