| Index: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
|
| diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
|
| index cd91fa3bdeea5c290dbe2e00fe4e2404f8fb716a..04bbd161ec699338a2a36ac9a6f912ee39207132 100644
|
| --- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
|
| +++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc
|
| @@ -57,6 +57,12 @@ VP9Encoder* VP9Encoder::Create() {
|
| return new VP9EncoderImpl();
|
| }
|
|
|
| +void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
|
| + void* user_data) {
|
| + VP9EncoderImpl* enc = (VP9EncoderImpl*)(user_data);
|
| + enc->GetEncodedLayerFrame(pkt);
|
| +}
|
| +
|
| VP9EncoderImpl::VP9EncoderImpl()
|
| : encoded_image_(),
|
| encoded_complete_callback_(NULL),
|
| @@ -67,7 +73,12 @@ VP9EncoderImpl::VP9EncoderImpl()
|
| rc_max_intra_target_(0),
|
| encoder_(NULL),
|
| config_(NULL),
|
| - raw_(NULL) {
|
| + raw_(NULL),
|
| + input_image_(NULL),
|
| + tl0_pic_idx_(0),
|
| + gof_idx_(0),
|
| + num_temporal_layers_(0),
|
| + num_spatial_layers_(0) {
|
| memset(&codec_, 0, sizeof(codec_));
|
| uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
|
| srand(seed);
|
| @@ -101,6 +112,55 @@ int VP9EncoderImpl::Release() {
|
| return WEBRTC_VIDEO_CODEC_OK;
|
| }
|
|
|
| +bool VP9EncoderImpl::SetSvcRates() {
|
| + float rate_ratio[VPX_MAX_LAYERS] = {0};
|
| + float total = 0;
|
| + uint8_t i = 0;
|
| +
|
| + for (i = 0; i < num_spatial_layers_; ++i) {
|
| + if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 ||
|
| + svc_internal_.svc_params.scaling_factor_den[i] <= 0) {
|
| + return false;
|
| + }
|
| + rate_ratio[i] = static_cast<float>(
|
| + svc_internal_.svc_params.scaling_factor_num[i]) /
|
| + svc_internal_.svc_params.scaling_factor_den[i];
|
| + total += rate_ratio[i];
|
| + }
|
| +
|
| + for (i = 0; i < num_spatial_layers_; ++i) {
|
| + config_->ss_target_bitrate[i] = static_cast<unsigned int>(
|
| + config_->rc_target_bitrate * rate_ratio[i] / total);
|
| + if (num_temporal_layers_ == 1) {
|
| + config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
|
| + } else if (num_temporal_layers_ == 2) {
|
| + config_->layer_target_bitrate[i * num_temporal_layers_] =
|
| + config_->ss_target_bitrate[i] * 2 / 3;
|
| + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
|
| + config_->ss_target_bitrate[i];
|
| + } else if (num_temporal_layers_ == 3) {
|
| + config_->layer_target_bitrate[i * num_temporal_layers_] =
|
| + config_->ss_target_bitrate[i] / 2;
|
| + config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
|
| + config_->layer_target_bitrate[i * num_temporal_layers_] +
|
| + (config_->ss_target_bitrate[i] / 4);
|
| + config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
|
| + config_->ss_target_bitrate[i];
|
| + } else {
|
| + return false;
|
| + }
|
| + }
|
| +
|
| + // For now, temporal layers only supported when having one spatial layer.
|
| + if (num_spatial_layers_ == 1) {
|
| + for (i = 0; i < num_temporal_layers_; ++i) {
|
| + config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i];
|
| + }
|
| + }
|
| +
|
| + return true;
|
| +}
|
| +
|
| int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
|
| uint32_t new_framerate) {
|
| if (!inited_) {
|
| @@ -118,6 +178,11 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
|
| }
|
| config_->rc_target_bitrate = new_bitrate_kbit;
|
| codec_.maxFramerate = new_framerate;
|
| +
|
| + if (!SetSvcRates()) {
|
| + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
| + }
|
| +
|
| // Update encoder context
|
| if (vpx_codec_enc_config_set(encoder_, config_)) {
|
| return WEBRTC_VIDEO_CODEC_ERROR;
|
| @@ -144,6 +209,13 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
| if (number_of_cores < 1) {
|
| return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
| }
|
| + if (inst->codecSpecific.VP9.numberOfTemporalLayers > 3) {
|
| + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
| + }
|
| + // For now, only support one spatial layer.
|
| + if (inst->codecSpecific.VP9.numberOfSpatialLayers != 1) {
|
| + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
| + }
|
| int retVal = Release();
|
| if (retVal < 0) {
|
| return retVal;
|
| @@ -158,6 +230,12 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
| if (&codec_ != inst) {
|
| codec_ = *inst;
|
| }
|
| +
|
| + num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers;
|
| + num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers;
|
| + if (num_temporal_layers_ == 0)
|
| + num_temporal_layers_ = 1;
|
| +
|
| // Random start 16 bits is enough.
|
| picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;
|
| // Allocate memory for encoded image
|
| @@ -209,13 +287,57 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
| config_->g_threads = NumberOfThreads(config_->g_w,
|
| config_->g_h,
|
| number_of_cores);
|
| +
|
| cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);
|
| +
|
| + // TODO(asapersson): Check configuration of temporal switch up and increase
|
| + // pattern length.
|
| + if (num_temporal_layers_ == 1) {
|
| + gof_.SetGofInfoVP9(kTemporalStructureMode1);
|
| + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
|
| + config_->ts_number_layers = 1;
|
| + config_->ts_rate_decimator[0] = 1;
|
| + config_->ts_periodicity = 1;
|
| + config_->ts_layer_id[0] = 0;
|
| + } else if (num_temporal_layers_ == 2) {
|
| + gof_.SetGofInfoVP9(kTemporalStructureMode2);
|
| + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
|
| + config_->ts_number_layers = 2;
|
| + config_->ts_rate_decimator[0] = 2;
|
| + config_->ts_rate_decimator[1] = 1;
|
| + config_->ts_periodicity = 2;
|
| + config_->ts_layer_id[0] = 0;
|
| + config_->ts_layer_id[1] = 1;
|
| + } else if (num_temporal_layers_ == 3) {
|
| + gof_.SetGofInfoVP9(kTemporalStructureMode3);
|
| + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
|
| + config_->ts_number_layers = 3;
|
| + config_->ts_rate_decimator[0] = 4;
|
| + config_->ts_rate_decimator[1] = 2;
|
| + config_->ts_rate_decimator[2] = 1;
|
| + config_->ts_periodicity = 4;
|
| + config_->ts_layer_id[0] = 0;
|
| + config_->ts_layer_id[1] = 2;
|
| + config_->ts_layer_id[2] = 1;
|
| + config_->ts_layer_id[3] = 2;
|
| + } else {
|
| + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
| + }
|
| +
|
| + tl0_pic_idx_ = static_cast<uint8_t>(rand());
|
| +
|
| return InitAndSetControlSettings(inst);
|
| }
|
|
|
| int VP9EncoderImpl::NumberOfThreads(int width,
|
| int height,
|
| int number_of_cores) {
|
| + // For the current libvpx library, only 1 thread is supported when SVC is
|
| + // turned on.
|
| + if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
|
| + return 1;
|
| + }
|
| +
|
| // Keep the number of encoder threads equal to the possible number of column
|
| // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
|
| if (width * height >= 1280 * 720 && number_of_cores > 4) {
|
| @@ -229,6 +351,27 @@ int VP9EncoderImpl::NumberOfThreads(int width,
|
| }
|
|
|
| int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
|
| +
|
| + config_->ss_number_layers = num_spatial_layers_;
|
| +
|
| + if (num_spatial_layers_ > 1) {
|
| + config_->rc_min_quantizer = 0;
|
| + config_->rc_max_quantizer = 63;
|
| + }
|
| + int scaling_factor_num = 256;
|
| + for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
|
| + svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;
|
| + svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;
|
| + // 1:2 scaling in each dimension.
|
| + svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;
|
| + svc_internal_.svc_params.scaling_factor_den[i] = 256;
|
| + scaling_factor_num /= 2;
|
| + }
|
| +
|
| + if (!SetSvcRates()) {
|
| + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
| + }
|
| +
|
| if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) {
|
| return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
|
| }
|
| @@ -237,6 +380,19 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
|
| rc_max_intra_target_);
|
| vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,
|
| inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0);
|
| +
|
| + vpx_codec_control(
|
| + encoder_, VP9E_SET_SVC,
|
| + (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
|
| + if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
|
| + vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
|
| + &svc_internal_.svc_params);
|
| + }
|
| + // Register callback for getting each spatial layer.
|
| + vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
|
| + VP9EncoderImpl::EncoderOutputCodedPacketCallback, (void*)(this)};
|
| + vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, (void*)(&cbp));
|
| +
|
| // Control function to set the number of column tiles in encoding a frame, in
|
| // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
|
| // The number tile columns will be capped by the encoder based on image size
|
| @@ -286,6 +442,13 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
|
| }
|
| DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));
|
| DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));
|
| +
|
| + // Set input image for use in the callback.
|
| + // This was necessary since you need some information from input_image.
|
| + // You can save only the necessary information (such as timestamp) instead of
|
| + // doing this.
|
| + input_image_ = &input_image;
|
| +
|
| // Image in vpx_image_t format.
|
| // Input image is const. VPX's raw image is not defined as const.
|
| raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane));
|
| @@ -308,7 +471,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
|
| return WEBRTC_VIDEO_CODEC_ERROR;
|
| }
|
| timestamp_ += duration;
|
| - return GetEncodedPartitions(input_image);
|
| +
|
| + return WEBRTC_VIDEO_CODEC_OK;
|
| }
|
|
|
| void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
| @@ -317,20 +481,83 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
| assert(codec_specific != NULL);
|
| codec_specific->codecType = kVideoCodecVP9;
|
| CodecSpecificInfoVP9 *vp9_info = &(codec_specific->codecSpecific.VP9);
|
| - vp9_info->pictureId = picture_id_;
|
| - vp9_info->keyIdx = kNoKeyIdx;
|
| - vp9_info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
|
| - // TODO(marpan): Temporal layers are supported in the current VP9 version,
|
| - // but for now use 1 temporal layer encoding. Will update this when temporal
|
| - // layer support for VP9 is added in webrtc.
|
| - vp9_info->temporalIdx = kNoTemporalIdx;
|
| - vp9_info->layerSync = false;
|
| - vp9_info->tl0PicIdx = kNoTl0PicIdx;
|
| - picture_id_ = (picture_id_ + 1) & 0x7FFF;
|
| + // TODO(asapersson): Set correct values.
|
| + vp9_info->inter_pic_predicted =
|
| + (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true;
|
| + vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode;
|
| + vp9_info->ss_data_available =
|
| + (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
|
| + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
|
| + gof_idx_ = 0;
|
| + }
|
| +
|
| + vpx_svc_layer_id_t layer_id = {0};
|
| + vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
| +
|
| + assert(num_temporal_layers_ > 0);
|
| + assert(num_spatial_layers_ > 0);
|
| + if (num_temporal_layers_ == 1) {
|
| + assert(layer_id.temporal_layer_id == 0);
|
| + vp9_info->temporal_idx = kNoTemporalIdx;
|
| + } else {
|
| + vp9_info->temporal_idx = layer_id.temporal_layer_id;
|
| + }
|
| + if (num_spatial_layers_ == 1) {
|
| + assert(layer_id.spatial_layer_id == 0);
|
| + vp9_info->spatial_idx = kNoSpatialIdx;
|
| + } else {
|
| + vp9_info->spatial_idx = layer_id.spatial_layer_id;
|
| + }
|
| + if (layer_id.spatial_layer_id != 0) {
|
| + vp9_info->ss_data_available = false;
|
| + }
|
| +
|
| + if (vp9_info->flexible_mode) {
|
| + vp9_info->gof_idx = kNoGofIdx;
|
| + } else {
|
| + vp9_info->gof_idx =
|
| + static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof);
|
| + }
|
| +
|
| + // TODO(asapersson): this info has to be obtained from the encoder.
|
| + vp9_info->temporal_up_switch = true;
|
| +
|
| + if (layer_id.spatial_layer_id == 0) {
|
| + picture_id_ = (picture_id_ + 1) & 0x7FFF;
|
| + // TODO(asapersson): this info has to be obtained from the encoder.
|
| + vp9_info->inter_layer_predicted = false;
|
| + } else {
|
| + // TODO(asapersson): this info has to be obtained from the encoder.
|
| + vp9_info->inter_layer_predicted = true;
|
| + }
|
| +
|
| + vp9_info->picture_id = picture_id_;
|
| +
|
| + if (!vp9_info->flexible_mode) {
|
| + if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) {
|
| + tl0_pic_idx_++;
|
| + }
|
| + vp9_info->tl0_pic_idx = tl0_pic_idx_;
|
| + }
|
| +
|
| + if (vp9_info->ss_data_available) {
|
| + vp9_info->num_spatial_layers = num_spatial_layers_;
|
| + vp9_info->spatial_layer_resolution_present = true;
|
| + for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {
|
| + vp9_info->width[i] = codec_.width *
|
| + svc_internal_.svc_params.scaling_factor_num[i] /
|
| + svc_internal_.svc_params.scaling_factor_den[i];
|
| + vp9_info->height[i] = codec_.height *
|
| + svc_internal_.svc_params.scaling_factor_num[i] /
|
| + svc_internal_.svc_params.scaling_factor_den[i];
|
| + }
|
| + if (!vp9_info->flexible_mode) {
|
| + vp9_info->gof.CopyGofInfoVP9(gof_);
|
| + }
|
| + }
|
| }
|
|
|
| -int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) {
|
| - vpx_codec_iter_t iter = NULL;
|
| +int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
|
| encoded_image_._length = 0;
|
| encoded_image_._frameType = kDeltaFrame;
|
| RTPFragmentationHeader frag_info;
|
| @@ -339,44 +566,33 @@ int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) {
|
| frag_info.VerifyAndAllocateFragmentationHeader(1);
|
| int part_idx = 0;
|
| CodecSpecificInfo codec_specific;
|
| - const vpx_codec_cx_pkt_t *pkt = NULL;
|
| - while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) {
|
| - switch (pkt->kind) {
|
| - case VPX_CODEC_CX_FRAME_PKT: {
|
| - memcpy(&encoded_image_._buffer[encoded_image_._length],
|
| - pkt->data.frame.buf,
|
| - pkt->data.frame.sz);
|
| - frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
|
| - frag_info.fragmentationLength[part_idx] =
|
| - static_cast<uint32_t>(pkt->data.frame.sz);
|
| - frag_info.fragmentationPlType[part_idx] = 0;
|
| - frag_info.fragmentationTimeDiff[part_idx] = 0;
|
| - encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
|
| - assert(encoded_image_._length <= encoded_image_._size);
|
| - break;
|
| - }
|
| - default: {
|
| - break;
|
| - }
|
| - }
|
| - // End of frame.
|
| - if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
|
| - // Check if encoded frame is a key frame.
|
| - if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
|
| - encoded_image_._frameType = kKeyFrame;
|
| - }
|
| - PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp());
|
| - break;
|
| - }
|
| +
|
| + assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT);
|
| + memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf,
|
| + pkt->data.frame.sz);
|
| + frag_info.fragmentationOffset[part_idx] = encoded_image_._length;
|
| + frag_info.fragmentationLength[part_idx] =
|
| + static_cast<uint32_t>(pkt->data.frame.sz);
|
| + frag_info.fragmentationPlType[part_idx] = 0;
|
| + frag_info.fragmentationTimeDiff[part_idx] = 0;
|
| + encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);
|
| + assert(encoded_image_._length <= encoded_image_._size);
|
| +
|
| + // End of frame.
|
| + // Check if encoded frame is a key frame.
|
| + if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
|
| + encoded_image_._frameType = kKeyFrame;
|
| }
|
| + PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
|
| +
|
| if (encoded_image_._length > 0) {
|
| TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
|
| - encoded_image_._timeStamp = input_image.timestamp();
|
| - encoded_image_.capture_time_ms_ = input_image.render_time_ms();
|
| + encoded_image_._timeStamp = input_image_->timestamp();
|
| + encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
|
| encoded_image_._encodedHeight = raw_->d_h;
|
| encoded_image_._encodedWidth = raw_->d_w;
|
| encoded_complete_callback_->Encoded(encoded_image_, &codec_specific,
|
| - &frag_info);
|
| + &frag_info);
|
| }
|
| return WEBRTC_VIDEO_CODEC_OK;
|
| }
|
|
|