webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc - Issue 1328113004: Work on flexible mode and screen sharing.

Unified Diff: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

Issue 1328113004: Work on flexible mode and screen sharing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Spatial layers when screensharing. Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/video_coding/codecs/vp9/vp9_impl.h ('K') | « webrtc/modules/video_coding/codecs/vp9/vp9_impl.h ('k') | webrtc/modules/video_coding/main/source/decoding_state.h » ('j') | webrtc/modules/video_coding/main/source/decoding_state.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

index 2e9f5ae30f6ca8e21fcb4bd9c9c0a87a8bb2d9a5..8f2b612120450c237d00eb5ae572015f112e0be0 100644

--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc

@@ -26,6 +26,7 @@

#include "webrtc/common.h"

#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"

#include "webrtc/modules/interface/module_common_types.h"

+#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h"

#include "webrtc/system_wrappers/interface/logging.h"

#include "webrtc/system_wrappers/interface/tick_util.h"

#include "webrtc/system_wrappers/interface/trace_event.h"

@@ -76,9 +77,11 @@ VP9EncoderImpl::VP9EncoderImpl()

raw_(NULL),

input_image_(NULL),

tl0_pic_idx_(0),

- gof_idx_(0),

+ frames_since_kf_(0),

num_temporal_layers_(0),

- num_spatial_layers_(0) {

+ num_spatial_layers_(0),

+ frames_encoded_(0),

+ spatial_layer_(new ScreenshareLayersVP9()) {

memset(&codec_, 0, sizeof(codec_));

uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());

srand(seed);

@@ -178,6 +181,7 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit,

}

config_->rc_target_bitrate = new_bitrate_kbit;

codec_.maxFramerate = new_framerate;

+ spatial_layer_->ConfigureBitrate(new_bitrate_kbit);

if (!SetSvcRates()) {

return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

@@ -216,6 +220,7 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,

if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) {

return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

}

int retVal = Release();

if (retVal < 0) {

return retVal;

@@ -292,7 +297,15 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,

// TODO(asapersson): Check configuration of temporal switch up and increase

// pattern length.

- if (num_temporal_layers_ == 1) {

+ is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode;

+ if (is_flexible_mode_) {

+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;

+ config_->ts_number_layers = num_temporal_layers_;

+ if (codec_.mode == kScreensharing) {

+ spatial_layer_->ConfigureBitrate(inst->startBitrate);

+ }

+ } else if (num_temporal_layers_ == 1) {

gof_.SetGofInfoVP9(kTemporalStructureMode1);

config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;

config_->ts_number_layers = 1;

@@ -360,7 +373,8 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {

// 1:2 scaling in each dimension.

svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num;

svc_internal_.svc_params.scaling_factor_den[i] = 256;

- scaling_factor_num /= 2;

+ if (!is_flexible_mode_)

sprang_webrtc 2015/09/15 15:41:22 This seems very specific. This only works because

philipel 2015/09/16 09:35:54 Yes, this assumption is completely wrong. It shoul

+ scaling_factor_num /= 2;

}

if (!SetSvcRates()) {

@@ -460,12 +474,29 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,

raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane);

raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane);

- int flags = 0;

+ vpx_enc_frame_flags_t flags = 0;

bool send_keyframe = (frame_type == kKeyFrame);

if (send_keyframe) {

// Key frame request from caller.

flags = VPX_EFLAG_FORCE_KF;

}

+ if (is_flexible_mode_) {

+ SuperFrameRefSettings settings;

+ vpx_svc_ref_frame_config enc_layer_conf;

+ if (codec_.mode == kRealtimeVideo) {

+ // Real time video not yet implemented in flexible mode.

+ CHECK(false);

sprang_webrtc 2015/09/15 15:41:22 RTC_NOTREACHED();

philipel 2015/09/16 09:35:54 Done.

+ } else {

+ settings =

+ spatial_layer_->SfSettings(input_image.timestamp(), send_keyframe);

+ }

+ enc_layer_conf = GenerateRefsAndFlags(settings);

+ vpx_codec_control(encoder_, VP9E_SET_FIRST_SPATIAL_LAYER,

+ settings.start_layer);

+ vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf);

+ }

assert(codec_.maxFramerate > 0);

uint32_t duration = 90000 / codec_.maxFramerate;

if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags,

@@ -491,9 +522,8 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,

!codec_.codecSpecific.VP9.flexibleMode)

? true

: false;

- if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {

- gof_idx_ = 0;

- }

+ if (pkt.data.frame.flags & VPX_FRAME_IS_KEY)

+ frames_since_kf_ = 0;

vpx_svc_layer_id_t layer_id = {0};

vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);

@@ -516,17 +546,10 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,

vp9_info->ss_data_available = false;

}

- if (vp9_info->flexible_mode) {

- vp9_info->gof_idx = kNoGofIdx;

- } else {

- vp9_info->gof_idx =

- static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof);

- }

// TODO(asapersson): this info has to be obtained from the encoder.

vp9_info->temporal_up_switch = true;

- if (layer_id.spatial_layer_id == 0) {

+ if (layer_id.spatial_layer_id == spatial_layer_->CurrentLayer()) {

picture_id_ = (picture_id_ + 1) & 0x7FFF;

// TODO(asapersson): this info has to be obtained from the encoder.

vp9_info->inter_layer_predicted = false;

@@ -547,6 +570,22 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,

// Always populate this, so that the packetizer can properly set the marker

// bit.

vp9_info->num_spatial_layers = num_spatial_layers_;

+ vp9_info->num_ref_pics = 0;

+ if (vp9_info->flexible_mode) {

+ vp9_info->gof_idx = kNoGofIdx;

+ if (!(pkt.data.frame.flags & VPX_FRAME_IS_KEY)) {

+ vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id];

+ for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) {

+ vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i];

+ }

+ } else {

+ vp9_info->gof_idx =

+ static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof);

+ }

+ ++frames_since_kf_;

if (vp9_info->ss_data_available) {

vp9_info->spatial_layer_resolution_present = true;

for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) {

@@ -582,6 +621,13 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {

frag_info.fragmentationPlType[part_idx] = 0;

frag_info.fragmentationTimeDiff[part_idx] = 0;

encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz);

+ vpx_svc_layer_id_t layer_id = {0};

+ vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);

+ if (is_flexible_mode_ && codec_.mode == kScreensharing)

+ spatial_layer_->LayerFrameEncoded(encoded_image_._length,

+ layer_id.spatial_layer_id);

assert(encoded_image_._length <= encoded_image_._size);

// End of frame.

@@ -603,6 +649,95 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {

return WEBRTC_VIDEO_CODEC_OK;

}

+vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags(

+ const SuperFrameRefSettings& settings) {

+ static const vpx_enc_frame_flags_t all_flags =

+ VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_LAST |

+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF;

+ vpx_svc_ref_frame_config sf_conf;

+ if (settings.is_keyframe) {

+ // Used later on to make sure we don't make any invalid references.

+ memset(buf_upd_at_frame_, -1, sizeof(buf_upd_at_frame_));

+ for (int l = settings.start_layer; l <= settings.stop_layer; ++l) {

sprang_webrtc 2015/09/15 15:41:22 More descriptive names please! layer instead of l?

philipel 2015/09/16 09:35:53 Done.

+ buf_upd_at_frame_[settings.layer[l].upd_buf] = frames_encoded_;

+ sf_conf.lst_fb_idx[l] = settings.layer[l].upd_buf;

+ }

+ } else {

+ for (int l = settings.start_layer; l <= settings.stop_layer; ++l) {

+ vpx_enc_frame_flags_t layer_flags = all_flags;

+ num_ref_pics_[l] = 0;

+ int8_t refs[3] = {settings.layer[l].ref_buf1, settings.layer[l].ref_buf2,

+ settings.layer[l].ref_buf3};

+ for (unsigned int r = 0; r < kMaxVp9RefPics; ++r) {

sprang_webrtc 2015/09/15 15:41:22 name

philipel 2015/09/16 09:35:54 Done.

+ if (refs[r] == -1)

+ continue;

+ DCHECK_GE(refs[r], 0);

+ DCHECK_LE(refs[r], 7);

+ switch (num_ref_pics_[l]) {

+ case 0: {

+ sf_conf.lst_fb_idx[l] = refs[r];

+ layer_flags ^= VP8_EFLAG_NO_REF_LAST;

sprang_webrtc 2015/09/15 15:41:22 Not sure I follow why you xor this flag. Comment?

philipel 2015/09/16 09:35:54 If we want to reference the LAST buffer then we sh

+ break;

+ }

+ case 1: {

+ sf_conf.gld_fb_idx[l] = refs[r];

+ layer_flags ^= VP8_EFLAG_NO_REF_GF;

+ break;

+ }

+ case 2: {

+ sf_conf.alt_fb_idx[l] = refs[r];

+ layer_flags ^= VP8_EFLAG_NO_REF_ARF;

+ break;

+ }

+ // Make sure we don't reference a buffer that hasn't been

+ // used at all or hasn't been used since a keyframe.

+ DCHECK_NE(buf_upd_at_frame_[refs[r]], -1);

+ p_diff_[l][num_ref_pics_[l]] =

+ frames_encoded_ - buf_upd_at_frame_[refs[r]];

+ num_ref_pics_[l]++;

+ }

+ if (settings.layer[l].upd_buf != -1) {

+ for (unsigned int r = 0; r < kMaxVp9RefPics; ++r) {

+ if (settings.layer[l].upd_buf == refs[r]) {

+ switch (r) {

+ case 0: {

+ layer_flags ^= VP8_EFLAG_NO_UPD_LAST;

+ break;

+ }

+ case 1: {

+ layer_flags ^= VP8_EFLAG_NO_UPD_GF;

+ break;

+ }

+ case 2: {

+ layer_flags ^= VP8_EFLAG_NO_UPD_ARF;

+ break;

+ }

+ goto done;

sprang_webrtc 2015/09/15 15:41:22 Please no goto's! Use a descriptively named temp

philipel 2015/09/16 09:35:54 Done.

+ }

+ // If we have three references and a buffer is specified to be updated,

+ // then that buffer must be the same as one of the three references.

+ CHECK_LT(num_ref_pics_[l], kMaxVp9RefPics);

+ sf_conf.alt_fb_idx[l] = settings.layer[l].upd_buf;

+ layer_flags ^= VP8_EFLAG_NO_UPD_ARF;

+ done:

+ buf_upd_at_frame_[settings.layer[l].upd_buf] = frames_encoded_;

+ sf_conf.frame_flags[l] = layer_flags;

+ }

+ frames_encoded_++;

+ return sf_conf;

int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) {

return WEBRTC_VIDEO_CODEC_OK;

}