Index: webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
diff --git a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
index 836a131648718b92ba21f60ca25f4dd55bd4b6a4..b57d4c11b8e350dacc5d0eab82c57163432d20d8 100644 |
--- a/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
+++ b/webrtc/modules/video_coding/codecs/vp9/vp9_impl.cc |
@@ -27,6 +27,7 @@ |
#include "webrtc/common.h" |
#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h" |
#include "webrtc/modules/interface/module_common_types.h" |
+#include "webrtc/modules/video_coding/codecs/vp9/screenshare_layers.h" |
#include "webrtc/system_wrappers/interface/logging.h" |
#include "webrtc/system_wrappers/interface/tick_util.h" |
@@ -76,9 +77,12 @@ VP9EncoderImpl::VP9EncoderImpl() |
raw_(NULL), |
input_image_(NULL), |
tl0_pic_idx_(0), |
- gof_idx_(0), |
+ frames_since_kf_(0), |
num_temporal_layers_(0), |
- num_spatial_layers_(0) { |
+ num_spatial_layers_(0), |
+ frames_encoded_(0), |
+ // Use two spatial when screensharing with flexible mode. |
+ spatial_layer_(new ScreenshareLayersVP9(2)) { |
memset(&codec_, 0, sizeof(codec_)); |
uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp()); |
srand(seed); |
@@ -178,6 +182,7 @@ int VP9EncoderImpl::SetRates(uint32_t new_bitrate_kbit, |
} |
config_->rc_target_bitrate = new_bitrate_kbit; |
codec_.maxFramerate = new_framerate; |
+ spatial_layer_->ConfigureBitrate(new_bitrate_kbit, 0); |
if (!SetSvcRates()) { |
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
@@ -216,6 +221,7 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, |
if (inst->codecSpecific.VP9.numberOfSpatialLayers > 2) { |
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; |
} |
+ |
int retVal = Release(); |
if (retVal < 0) { |
return retVal; |
@@ -294,7 +300,13 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, |
// TODO(asapersson): Check configuration of temporal switch up and increase |
// pattern length. |
- if (num_temporal_layers_ == 1) { |
+ is_flexible_mode_ = inst->codecSpecific.VP9.flexibleMode; |
+ if (is_flexible_mode_) { |
+ config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; |
+ config_->ts_number_layers = num_temporal_layers_; |
+ if (codec_.mode == kScreensharing) |
+ spatial_layer_->ConfigureBitrate(inst->startBitrate, 0); |
+ } else if (num_temporal_layers_ == 1) { |
gof_.SetGofInfoVP9(kTemporalStructureMode1); |
config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; |
config_->ts_number_layers = 1; |
@@ -356,7 +368,8 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { |
// 1:2 scaling in each dimension. |
svc_internal_.svc_params.scaling_factor_num[i] = scaling_factor_num; |
svc_internal_.svc_params.scaling_factor_den[i] = 256; |
- scaling_factor_num /= 2; |
+ if (codec_.mode != kScreensharing) |
+ scaling_factor_num /= 2; |
} |
if (!SetSvcRates()) { |
@@ -455,12 +468,31 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, |
raw_->stride[VPX_PLANE_U] = input_image.stride(kUPlane); |
raw_->stride[VPX_PLANE_V] = input_image.stride(kVPlane); |
- int flags = 0; |
+ vpx_enc_frame_flags_t flags = 0; |
bool send_keyframe = (frame_type == kKeyFrame); |
if (send_keyframe) { |
// Key frame request from caller. |
flags = VPX_EFLAG_FORCE_KF; |
} |
+ |
+ if (is_flexible_mode_) { |
+ SuperFrameRefSettings settings; |
+ vpx_svc_ref_frame_config enc_layer_conf; |
+ if (codec_.mode == kRealtimeVideo) { |
+ // Real time video not yet implemented in flexible mode. |
+ RTC_NOTREACHED(); |
+ } else { |
+ settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(), |
+ send_keyframe); |
+ } |
+ enc_layer_conf = GenerateRefsAndFlags(settings); |
+ vpx_svc_layer_id layer_id; |
+ layer_id.temporal_layer_id = 0; |
+ layer_id.spatial_layer_id = settings.start_layer; |
+ vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); |
+ vpx_codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); |
+ } |
+ |
assert(codec_.maxFramerate > 0); |
uint32_t duration = 90000 / codec_.maxFramerate; |
if (vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags, |
@@ -486,9 +518,8 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, |
!codec_.codecSpecific.VP9.flexibleMode) |
? true |
: false; |
- if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { |
- gof_idx_ = 0; |
- } |
+ if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) |
+ frames_since_kf_ = 0; |
vpx_svc_layer_id_t layer_id = {0}; |
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); |
@@ -511,17 +542,10 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, |
vp9_info->ss_data_available = false; |
} |
- if (vp9_info->flexible_mode) { |
- vp9_info->gof_idx = kNoGofIdx; |
- } else { |
- vp9_info->gof_idx = |
- static_cast<uint8_t>(gof_idx_++ % gof_.num_frames_in_gof); |
- } |
- |
// TODO(asapersson): this info has to be obtained from the encoder. |
vp9_info->temporal_up_switch = true; |
- if (layer_id.spatial_layer_id == 0) { |
+ if (layer_id.spatial_layer_id == spatial_layer_->GetStartLayer()) { |
picture_id_ = (picture_id_ + 1) & 0x7FFF; |
// TODO(asapersson): this info has to be obtained from the encoder. |
vp9_info->inter_layer_predicted = false; |
@@ -542,6 +566,20 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, |
// Always populate this, so that the packetizer can properly set the marker |
// bit. |
vp9_info->num_spatial_layers = num_spatial_layers_; |
+ |
+ vp9_info->num_ref_pics = 0; |
+ if (vp9_info->flexible_mode) { |
+ vp9_info->gof_idx = kNoGofIdx; |
+ vp9_info->num_ref_pics = num_ref_pics_[layer_id.spatial_layer_id]; |
+ for (int i = 0; i < num_ref_pics_[layer_id.spatial_layer_id]; ++i) { |
+ vp9_info->p_diff[i] = p_diff_[layer_id.spatial_layer_id][i]; |
+ } |
+ } else { |
+ vp9_info->gof_idx = |
+ static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof); |
+ } |
+ ++frames_since_kf_; |
+ |
if (vp9_info->ss_data_available) { |
vp9_info->spatial_layer_resolution_present = true; |
for (size_t i = 0; i < vp9_info->num_spatial_layers; ++i) { |
@@ -577,6 +615,13 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { |
frag_info.fragmentationPlType[part_idx] = 0; |
frag_info.fragmentationTimeDiff[part_idx] = 0; |
encoded_image_._length += static_cast<uint32_t>(pkt->data.frame.sz); |
+ |
+ vpx_svc_layer_id_t layer_id = {0}; |
+ vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); |
+ if (is_flexible_mode_ && codec_.mode == kScreensharing) |
+ spatial_layer_->LayerFrameEncoded(encoded_image_._length, |
+ layer_id.spatial_layer_id); |
+ |
assert(encoded_image_._length <= encoded_image_._size); |
// End of frame. |
@@ -598,6 +643,104 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { |
return WEBRTC_VIDEO_CODEC_OK; |
} |
+vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags( |
+ const SuperFrameRefSettings& settings) { |
+ static const vpx_enc_frame_flags_t kAllFlags = |
+ VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST | |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; |
+ vpx_svc_ref_frame_config sf_conf; |
+ if (settings.is_keyframe) { |
+ // Used later on to make sure we don't make any invalid references. |
+ memset(buf_upd_at_frame_, -1, sizeof(buf_upd_at_frame_)); |
+ for (int l = settings.start_layer; l <= settings.stop_layer; ++l) { |
+ num_ref_pics_[l] = 0; |
+ buf_upd_at_frame_[settings.layer[l].upd_buf] = frames_encoded_; |
+ sf_conf.lst_fb_idx[l] = settings.layer[l].upd_buf; |
+ } |
+ } else { |
+ for (int layer_idx = settings.start_layer; layer_idx <= settings.stop_layer; |
+ ++layer_idx) { |
+ vpx_enc_frame_flags_t layer_flags = kAllFlags; |
+ num_ref_pics_[layer_idx] = 0; |
+ int8_t refs[3] = {settings.layer[layer_idx].ref_buf1, |
+ settings.layer[layer_idx].ref_buf2, |
+ settings.layer[layer_idx].ref_buf3}; |
+ |
+ for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) { |
+ if (refs[ref_idx] == -1) |
+ continue; |
+ |
+ RTC_DCHECK_GE(refs[ref_idx], 0); |
+ RTC_DCHECK_LE(refs[ref_idx], 7); |
+ // Easier to remove flags from all flags rather than having to |
+ // build the flags from 0. |
+ switch (num_ref_pics_[layer_idx]) { |
+ case 0: { |
+ sf_conf.lst_fb_idx[layer_idx] = refs[ref_idx]; |
+ layer_flags &= ~VP8_EFLAG_NO_REF_LAST; |
+ break; |
+ } |
+ case 1: { |
+ sf_conf.gld_fb_idx[layer_idx] = refs[ref_idx]; |
+ layer_flags &= ~VP8_EFLAG_NO_REF_GF; |
+ break; |
+ } |
+ case 2: { |
+ sf_conf.alt_fb_idx[layer_idx] = refs[ref_idx]; |
+ layer_flags &= ~VP8_EFLAG_NO_REF_ARF; |
+ break; |
+ } |
+ } |
+ // Make sure we don't reference a buffer that hasn't been |
+ // used at all or hasn't been used since a keyframe. |
+ RTC_DCHECK_NE(buf_upd_at_frame_[refs[ref_idx]], -1); |
+ |
+ p_diff_[layer_idx][num_ref_pics_[layer_idx]] = |
+ frames_encoded_ - buf_upd_at_frame_[refs[ref_idx]]; |
+ num_ref_pics_[layer_idx]++; |
+ } |
+ |
+ bool upd_buf_same_as_a_ref = false; |
+ if (settings.layer[layer_idx].upd_buf != -1) { |
+ for (unsigned int ref_idx = 0; ref_idx < kMaxVp9RefPics; ++ref_idx) { |
+ if (settings.layer[layer_idx].upd_buf == refs[ref_idx]) { |
+ switch (ref_idx) { |
+ case 0: { |
+ layer_flags &= ~VP8_EFLAG_NO_UPD_LAST; |
+ break; |
+ } |
+ case 1: { |
+ layer_flags &= ~VP8_EFLAG_NO_UPD_GF; |
+ break; |
+ } |
+ case 2: { |
+ layer_flags &= ~VP8_EFLAG_NO_UPD_ARF; |
+ break; |
+ } |
+ } |
+ upd_buf_same_as_a_ref = true; |
+ break; |
+ } |
+ } |
+ if (!upd_buf_same_as_a_ref) { |
+ // If we have three references and a buffer is specified to be |
+ // updated, |
+ // then that buffer must be the same as one of the three references. |
+ RTC_CHECK_LT(num_ref_pics_[layer_idx], kMaxVp9RefPics); |
+ |
+ sf_conf.alt_fb_idx[layer_idx] = settings.layer[layer_idx].upd_buf; |
+ layer_flags ^= VP8_EFLAG_NO_UPD_ARF; |
+ } |
+ |
+ buf_upd_at_frame_[settings.layer[layer_idx].upd_buf] = frames_encoded_; |
+ sf_conf.frame_flags[layer_idx] = layer_flags; |
+ } |
+ } |
+ } |
+ frames_encoded_++; |
+ return sf_conf; |
+} |
+ |
int VP9EncoderImpl::SetChannelParameters(uint32_t packet_loss, int64_t rtt) { |
return WEBRTC_VIDEO_CODEC_OK; |
} |