webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc - Issue 1211353002: Integration of VP9 packetization.

Unified Diff: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

Issue 1211353002: Integration of VP9 packetization. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/video_coding/codecs/interface/video_codec_interface.h ('K') | « webrtc/modules/video_coding/codecs/vp9/vp9_impl.h ('k') | webrtc/modules/video_coding/main/source/codec_database.cc » ('j') | webrtc/modules/video_coding/main/source/encoded_frame.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

index cd91fa3bdeea5c290dbe2e00fe4e2404f8fb716a..22d68833fe959646d7c88ccea136fac2d14a5007 100644

--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

@@ -57,6 +57,12 @@ VP9Encoder* VP9Encoder::Create() {

return new VP9EncoderImpl();

}

+void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,

+ void* user_data) {

+ VP9EncoderImpl* enc = (VP9EncoderImpl*)(user_data);

+ enc->GetEncodedLayerFrame(pkt);

VP9EncoderImpl::VP9EncoderImpl()

: encoded_image_(),

encoded_complete_callback_(NULL),

@@ -67,7 +73,12 @@ VP9EncoderImpl::VP9EncoderImpl()

rc_max_intra_target_(0),

encoder_(NULL),

config_(NULL),

- raw_(NULL) {

+ raw_(NULL),

+ input_image_(NULL),

+ tl0_pic_idx_(0),

+ gof_idx_(0),

+ num_temporal_layers_(0),

+ num_spatial_layers_(0) {

memset(&codec_, 0, sizeof(codec_));

uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());

srand(seed);

@@ -101,6 +112,59 @@ int VP9EncoderImpl::Release() {

return WEBRTC_VIDEO_CODEC_OK;

}

+bool VP9EncoderImpl::SetSvcRates() {

+ float rate_ratio[VPX_MAX_LAYERS] = {0};

+ float total = 0;

+ uint8_t i = 0;

+ if (num_spatial_layers_ > 1) {

+ for (i = 0; i < num_spatial_layers_; ++i) {

+ if (svc_internal_.svc_params.scaling_factor_num[i] <= 0 ||

+ svc_internal_.svc_params.scaling_factor_den[i] <= 0) {

+ return false;

+ }

+ rate_ratio[i] = static_cast<float>(

+ svc_internal_.svc_params.scaling_factor_num[i]) /

+ svc_internal_.svc_params.scaling_factor_den[i];

+ total += rate_ratio[i];

+ }

+ } else {

+ rate_ratio[0] = total = 1;

+ }

+ assert(total >= 1.0f);

stefan-webrtc 2015/07/29 13:13:05 Hm, seems like there could be a risk that this doe

åsapersson 2015/07/30 11:42:19 Done.

+ for (i = 0; i < num_spatial_layers_; ++i) {

+ config_->ss_target_bitrate[i] = static_cast<unsigned int>(

+ config_->rc_target_bitrate * rate_ratio[i] / total);

stefan-webrtc 2015/07/29 13:13:05 Should this really divide by total? total is expec

åsapersson 2015/07/30 11:42:19 total will be >1 for num_spatial_layers_ > 1...

stefan-webrtc 2015/07/30 12:05:25 Right, my mistake.

+ if (num_temporal_layers_ == 1) {

+ config_->layer_target_bitrate[0] = config_->ss_target_bitrate[i];

+ } else if (num_temporal_layers_ == 2) {

+ config_->layer_target_bitrate[i * num_temporal_layers_] =

+ config_->ss_target_bitrate[i] * 2 / 3;

+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =

+ config_->ss_target_bitrate[i];

+ } else if (num_temporal_layers_ == 3) {

+ config_->layer_target_bitrate[i * num_temporal_layers_] =

+ config_->ss_target_bitrate[i] / 2;

+ config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =

+ config_->layer_target_bitrate[i * num_temporal_layers_] +

+ (config_->ss_target_bitrate[i] / 4);

+ config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =

+ config_->ss_target_bitrate[i];

+ } else {

+ return false;

+ }

+ if (num_spatial_layers_ == 1) {

stefan-webrtc 2015/07/29 13:13:05 Comment: Currently only support temporal layers if

åsapersson 2015/07/30 11:42:19 Added a comment.

+ for (i = 0; i < num_temporal_layers_; ++i) {

+ config_->ts_target_bitrate[i] = config_->layer_target_bitrate[i];

+ }

+ return true;

int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,

uint32_t new_framerate) {

if (!inited_) {

@@ -118,6 +182,11 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,

}

config_->rc_target_bitrate = new_bitrate_kbit;

codec_.maxFramerate = new_framerate;

+ if (!SetSvcRates()) {

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ }

// Update encoder context

if (vpx_codec_enc_config_set(encoder_, config_)) {

return WEBRTC_VIDEO_CODEC_ERROR;

@@ -158,6 +227,14 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,

if (&codec_ != inst) {

codec_ = *inst;

}

+ num_temporal_layers_ = inst->codecSpecific.VP9.numberOfTemporalLayers;

+ num_spatial_layers_ = inst->codecSpecific.VP9.numberOfSpatialLayers;

+ // For now, only support 1 spatial layer.

+ if (num_spatial_layers_ != 1) {

stefan-webrtc 2015/07/29 13:13:05 Does this mean one more than the base layer?

åsapersson 2015/07/30 11:42:19 Only one spatial layer.

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ }

// Random start 16 bits is enough.

picture_id_ = static_cast<uint16_t>(rand()) & 0x7FFF;

// Allocate memory for encoded image

@@ -209,13 +286,56 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,

config_->g_threads = NumberOfThreads(config_->g_w,

config_->g_h,

number_of_cores);

cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h);

+ // TODO(asapersson): Check configuration of temporal switch up.

+ if (num_temporal_layers_ == 1) {

+ gof_.SetGofInfoVP9(kTemporalStructureMode1);

+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;

+ config_->ts_number_layers = 1;

+ config_->ts_rate_decimator[0] = 1;

+ config_->ts_periodicity = 1;

+ config_->ts_layer_id[0] = 0;

+ } else if (num_temporal_layers_ == 2) {

+ gof_.SetGofInfoVP9(kTemporalStructureMode2);

+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;

+ config_->ts_number_layers = 2;

+ config_->ts_rate_decimator[0] = 2;

+ config_->ts_rate_decimator[1] = 1;

+ config_->ts_periodicity = 2;

+ config_->ts_layer_id[0] = 0;

+ config_->ts_layer_id[1] = 1;

+ } else if (num_temporal_layers_ == 3) {

+ gof_.SetGofInfoVP9(kTemporalStructureMode3);

+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;

+ config_->ts_number_layers = 3;

+ config_->ts_rate_decimator[0] = 4;

+ config_->ts_rate_decimator[1] = 2;

+ config_->ts_rate_decimator[2] = 1;

+ config_->ts_periodicity = 4;

+ config_->ts_layer_id[0] = 0;

+ config_->ts_layer_id[1] = 2;

+ config_->ts_layer_id[2] = 1;

+ config_->ts_layer_id[3] = 2;

+ } else {

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ }

stefan-webrtc 2015/07/29 13:13:05 Ideally I would like this code to be shared with t

åsapersson 2015/07/30 11:42:19 Leave for follow up cl?

stefan-webrtc 2015/07/30 12:05:25 Acknowledged.

+ tl0_pic_idx_ = static_cast<uint8_t>(rand());

return InitAndSetControlSettings(inst);

}

int VP9EncoderImpl::NumberOfThreads(int width,

int height,

int number_of_cores) {

+ // For the current libvpx library, only 1 thread is supported when SVC is

+ // turned on.

+ if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {

+ return 1;

+ }

// Keep the number of encoder threads equal to the possible number of column

// tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.

if (width * height >= 1280 * 720 && number_of_cores > 4) {

@@ -229,6 +349,26 @@ int VP9EncoderImpl::NumberOfThreads(int width,

}

int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {

+ config_->ss_number_layers = num_spatial_layers_;

+ if (num_spatial_layers_ > 1) {

+ config_->rc_min_quantizer = 0;

+ config_->rc_max_quantizer = 63;

+ }

+ int scaling_factor_num = 256;

+ for (int i = num_spatial_layers_ - 1; i >= 0; --i, scaling_factor_num /= 2) {

stefan-webrtc 2015/07/29 13:13:05 Move scaling_factor_num /= 2 to line 366 instead.

åsapersson 2015/07/30 11:42:19 Done.

+ svc_internal_.svc_params.max_quantizers[i] = config_->rc_max_quantizer;

+ svc_internal_.svc_params.min_quantizers[i] = config_->rc_min_quantizer;

+ // 1:2 scaling in each dimension.

+ svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;

+ svc_internal_.svc_params.scaling_factor_den[i] = 256;

+ }

+ if (!SetSvcRates()) {

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ }

if (vpx_codec_enc_init(encoder_, vpx_codec_vp9_cx(), config_, 0)) {

return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

}

@@ -237,6 +377,19 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {

rc_max_intra_target_);

vpx_codec_control(encoder_, VP9E_SET_AQ_MODE,

inst->codecSpecific.VP9.adaptiveQpMode ? 3 : 0);

+ vpx_codec_control(

+ encoder_, VP9E_SET_SVC,

+ (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);

+ if (num_spatial_layers_ > 1) {

+ vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,

+ &svc_internal_.svc_params);

stefan-webrtc 2015/07/29 13:13:05 Should this not be called if num_temporal_layers_

åsapersson 2015/07/30 11:42:19 Done.

+ }

+ // Register callback for getting each spatial layer.

+ vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {

+ VP9EncoderImpl::EncoderOutputCodedPacketCallback, (void*)(this)};

+ vpx_codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK, (void*)(&cbp));

// Control function to set the number of column tiles in encoding a frame, in

// log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.

// The number tile columns will be capped by the encoder based on image size

@@ -286,6 +439,13 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,

}

DCHECK_EQ(input_image.width(), static_cast<int>(raw_->d_w));

DCHECK_EQ(input_image.height(), static_cast<int>(raw_->d_h));

+ // Set input image for use in the callback.

+ // This was necessary since you need some information from input_image.

+ // You can save only the necessary information (such as timestamp) instead of

+ // doing this.

+ input_image_ = &input_image;

// Image in vpx_image_t format.

// Input image is const. VPX's raw image is not defined as const.

raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(input_image.buffer(kYPlane));

@@ -308,7 +468,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,

return WEBRTC_VIDEO_CODEC_ERROR;

}

timestamp_ += duration;

- return GetEncodedPartitions(input_image);

+ return WEBRTC_VIDEO_CODEC_OK;

}

void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,

@@ -317,20 +478,81 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,

assert(codec_specific != NULL);

codec_specific->codecType = kVideoCodecVP9;

CodecSpecificInfoVP9 *vp9_info = &(codec_specific->codecSpecific.VP9);

- vp9_info->pictureId = picture_id_;

- vp9_info->keyIdx = kNoKeyIdx;

- vp9_info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;

- // TODO(marpan): Temporal layers are supported in the current VP9 version,

- // but for now use 1 temporal layer encoding. Will update this when temporal

- // layer support for VP9 is added in webrtc.

- vp9_info->temporalIdx = kNoTemporalIdx;

- vp9_info->layerSync = false;

- vp9_info->tl0PicIdx = kNoTl0PicIdx;

- picture_id_ = (picture_id_ + 1) & 0x7FFF;

+ // TODO(asapersson): Set correct values.

+ vp9_info->inter_pic_predicted =

+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? false : true;

+ vp9_info->flexible_mode = codec_.codecSpecific.VP9.flexibleMode;

+ vp9_info->beginning_of_frame = true;

+ vp9_info->end_of_frame = true;

stefan-webrtc 2015/07/29 13:13:05 Seems like these two flags aren't really needed on

åsapersson 2015/07/30 11:42:19 Removed these settings from CodecSpecificInfo and

+ vp9_info->ss_data_available =

+ (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;

+ vpx_svc_layer_id_t layer_id = {0};

+ vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);

stefan-webrtc 2015/07/29 13:13:05 Do you know how this works? Does it return the spa

åsapersson 2015/07/30 11:42:19 Yes it looks so.

+ assert(num_temporal_layers_ > 0);

+ assert(num_spatial_layers_ > 0);

+ if (num_temporal_layers_ == 1) {

+ assert(layer_id.temporal_layer_id == 0);

+ vp9_info->temporal_idx = kNoTemporalIdx;

+ } else {

+ vp9_info->temporal_idx = layer_id.temporal_layer_id;

+ }

+ if (num_spatial_layers_ == 1) {

+ assert(layer_id.spatial_layer_id == 0);

+ vp9_info->spatial_idx = kNoSpatialIdx;

+ } else {

+ vp9_info->spatial_idx = layer_id.spatial_layer_id;

+ }

+ if (layer_id.spatial_layer_id != 0) {

+ vp9_info->ss_data_available = false;

+ }

stefan-webrtc 2015/07/29 13:13:05 Seems like we should DCHECK(layer_id.spatial_layer

åsapersson 2015/07/30 11:42:19 Skipped this as discussed offline.

+ if (vp9_info->flexible_mode) {

+ vp9_info->gof_idx = kNoGofIdx;

+ } else {

+ vp9_info->gof_idx = gof_idx_++ % gof_.num_frames_in_gof;

+ }

+ // TODO(asapersson): this info has to be obtained from the encoder.

+ vp9_info->temporal_up_switch = true;

+ if (layer_id.spatial_layer_id == 0) {

+ picture_id_ = (picture_id_ + 1) & 0x7FFF;

+ // TODO(asapersson): this info has to be obtained from the encoder.

+ vp9_info->inter_layer_predicted = false;

+ } else {

+ // TODO(asapersson): this info has to be obtained from the encoder.

+ vp9_info->inter_layer_predicted = true;

+ }

+ vp9_info->picture_id = picture_id_;

+ if (!vp9_info->flexible_mode) {

+ if (layer_id.temporal_layer_id == 0 && layer_id.spatial_layer_id == 0) {

+ tl0_pic_idx_++;

+ }

+ vp9_info->tl0_pic_idx = tl0_pic_idx_;

+ }

+ if (vp9_info->ss_data_available) {

+ vp9_info->num_spatial_layers = num_spatial_layers_;

+ vp9_info->spatial_layer_resolution_present = true;

+ for (uint8_t i = 0; i < vp9_info->num_spatial_layers; i++) {

stefan-webrtc 2015/07/29 13:13:05 ++i and make i an int.

åsapersson 2015/07/30 11:42:19 Changed to size_t which num_spatial_layers is.

+ vp9_info->width[i] = codec_.width *

+ svc_internal_.svc_params.scaling_factor_num[i] /

+ svc_internal_.svc_params.scaling_factor_den[i];

+ vp9_info->height[i] = codec_.height *

+ svc_internal_.svc_params.scaling_factor_num[i] /

+ svc_internal_.svc_params.scaling_factor_den[i];

+ }

+ if (!vp9_info->flexible_mode) {

+ vp9_info->gof.CopyGofInfoVP9(gof_);

+ }

}

-int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) {

- vpx_codec_iter_t iter = NULL;

+int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {

encoded_image_._length = 0;

encoded_image_._frameType = kDeltaFrame;

RTPFragmentationHeader frag_info;

@@ -339,44 +561,34 @@ int VP9EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image) {

frag_info.VerifyAndAllocateFragmentationHeader(1);

int part_idx = 0;

CodecSpecificInfo codec_specific;

- const vpx_codec_cx_pkt_t *pkt = NULL;

- while ((pkt = vpx_codec_get_cx_data(encoder_, &iter)) != NULL) {

- switch (pkt->kind) {

- case VPX_CODEC_CX_FRAME_PKT: {

- memcpy(&encoded_image_._buffer[encoded_image_._length],

- pkt->data.frame.buf,

- pkt->data.frame.sz);

- frag_info.fragmentationOffset[part_idx] = encoded_image_._length;

- frag_info.fragmentationLength[part_idx] =

- static_cast<uint32_t>(pkt->data.frame.sz);

- frag_info.fragmentationPlType[part_idx] = 0;

- frag_info.fragmentationTimeDiff[part_idx] = 0;

- encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);

- assert(encoded_image_._length <= encoded_image_._size);

- break;

- }

- default: {

- break;

- }

- // End of frame.

- if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {

- // Check if encoded frame is a key frame.

- if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {

- encoded_image_._frameType = kKeyFrame;

- }

- PopulateCodecSpecific(&codec_specific, *pkt, input_image.timestamp());

- break;

+ assert(pkt->kind == VPX_CODEC_CX_FRAME_PKT);

+ memcpy(&encoded_image_._buffer[encoded_image_._length], pkt->data.frame.buf,

+ pkt->data.frame.sz);

+ frag_info.fragmentationOffset[part_idx] = encoded_image_._length;

+ frag_info.fragmentationLength[part_idx] =

+ static_cast<uint32_t>(pkt->data.frame.sz);

+ frag_info.fragmentationPlType[part_idx] = 0;

+ frag_info.fragmentationTimeDiff[part_idx] = 0;

+ encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);

+ assert(encoded_image_._length <= encoded_image_._size);

+ // End of frame.

+ if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {

stefan-webrtc 2015/07/29 13:13:05 I doubt this check is needed. Feel free to try to

åsapersson 2015/07/30 11:42:19 Seems ok to remove, done.

+ // Check if encoded frame is a key frame.

+ if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {

+ encoded_image_._frameType = kKeyFrame;

}

+ PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());

}

if (encoded_image_._length > 0) {

TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);

- encoded_image_._timeStamp = input_image.timestamp();

- encoded_image_.capture_time_ms_ = input_image.render_time_ms();

+ encoded_image_._timeStamp = input_image_->timestamp();

+ encoded_image_.capture_time_ms_ = input_image_->render_time_ms();

encoded_image_._encodedHeight = raw_->d_h;

encoded_image_._encodedWidth = raw_->d_w;

encoded_complete_callback_->Encoded(encoded_image_, &codec_specific,

- &frag_info);

+ &frag_info);

}

return WEBRTC_VIDEO_CODEC_OK;

}