webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc - Issue 1306813009: H.264 video codec support using OpenH264/FFmpeg

Side by Side Diff: webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc

Issue 1306813009: H.264 video codec support using OpenH264/FFmpeg (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Addressed stefan's comments Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 *

	10 */

	11

	12 #include "webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h"

	13

	14 // OpenH264

	15 #include "codec_api.h"

	16 #include "codec_app_def.h"

	17 #include "codec_def.h"

	18

	19 #include "webrtc/base/checks.h"

	20 #include "webrtc/base/logging.h"

	21 #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"

	22

	23 namespace webrtc {

	24

	25 namespace {

	26 const bool kOpenH264EncoderDetailedLogging = false;

	27 } // namespace

	28

	29 static VideoFrameType EVideoFrameType_to_VideoFrameType(

	30 EVideoFrameType type) {

	31 switch (type) {

	32 case videoFrameTypeInvalid:

	33 return kSkipFrame;

	34 case videoFrameTypeSkip:

	35 return kDeltaFrame;

	36 case videoFrameTypeIDR:

	37 return kKeyFrame;

	38 case videoFrameTypeI:

	39 case videoFrameTypeP:

	40 case videoFrameTypeIPMixed:

	41 return kDeltaFrame;

	42 default:

	43 LOG(LS_WARNING) << "Unknown EVideoFrameType: " << type;

	44 return kDeltaFrame;

	45 }

	46 }

	47 // Helper method used by H264EncoderImpl::Encode.

	48 // Copies the encoded bytes from \|info\| to \|encoded_image\| and updates the

	49 // fragmentation information of \|frag_header\|. The \|encoded_image->_buffer\| may

	50 // be deleted and reallocated if a bigger buffer is required.

	51 // After OpenH264 encoding, the encoded bytes are stored in \|info\| spread out

	52 // over a number of layers and "NAL units". Each NAL unit is a fragment starting

	53 // with the four-byte start code {0,0,0,1}. All of this data (including the

	54 // start codes) is copied to the \|encoded_image->_buffer\| and the \|frag_header\|

	55 // is updated to point to each fragment, with offsets and lengths set as to

	56 // exclude the start codes.

	57 static void RtpFragmentize(EncodedImage* encoded_image,

	58 rtc::scoped_ptr<uint8_t>* encoded_image_buffer,

	59 const VideoFrame& frame,

	60 SFrameBSInfo* info,

	61 RTPFragmentationHeader* frag_header) {

	62 // Calculate minimum buffer size required to hold encoded data.

	63 size_t required_size = 0;

	64 size_t fragments_count = 0;

	65 for (int iLayer = 0; iLayer < info->iLayerNum; ++iLayer) {

	66 const SLayerBSInfo& layerInfo = info->sLayerInfo[iLayer];

	67 for (int iNal = 0; iNal < layerInfo.iNalCount; ++iNal) {

	68 required_size += layerInfo.pNalLengthInByte[iNal];

	69 ++fragments_count;

	70 }

	71 }

	72 if (encoded_image->_size < required_size) {

	73 // Increase buffer size. Allocate enough to hold an unencoded image, this

	74 // should be more than enough to hold any encoded data of future frames of

	75 // the same size (avoiding possible future reallocation due to variations in

	76 // required size).

	77 encoded_image->_size = CalcBufferSize(

	78 VideoType::kI420, frame.width(), frame.height());

	79 if (encoded_image->_size < required_size) {

	80 // Encoded data > unencoded data, wtf? Allocate required bytes.

	81 LOG(LS_WARNING) << "Encoding produced more bytes than the original image "

	82 << "data! Original bytes: " << encoded_image->_size

	83 << ", encoded bytes: " << required_size << ".";

	84 encoded_image->_size = required_size;

	85 }

	86 encoded_image->_buffer = new uint8_t[encoded_image->_size];

	87 encoded_image_buffer->reset(encoded_image->_buffer);

	88 }

	89

	90 // Iterate layers and NAL units, note each NAL unit as a fragment and copy

	91 // the data to \|encoded_image->_buffer\|.

	92 frag_header->VerifyAndAllocateFragmentationHeader(fragments_count);

	93 size_t frag_i = 0;

	94 encoded_image->_length = 0;

	95 for (int iLayer = 0; iLayer < info->iLayerNum; ++iLayer) {

	96 const SLayerBSInfo& layerInfo = info->sLayerInfo[iLayer];

	97 // Iterate NAL units making up this layer, noting fragments.

	98 size_t iLayerLen = 0;

	99 for (int iNal = 0; iNal < layerInfo.iNalCount; ++iNal, ++frag_i) {

	100 // Expecting start code constant {0,0,0,1}.

	101 DCHECK_EQ(layerInfo.pBsBuf[iLayerLen+0], static_cast<unsigned char>(0));

	102 DCHECK_EQ(layerInfo.pBsBuf[iLayerLen+1], static_cast<unsigned char>(0));

	103 DCHECK_EQ(layerInfo.pBsBuf[iLayerLen+2], static_cast<unsigned char>(0));

	104 DCHECK_EQ(layerInfo.pBsBuf[iLayerLen+3], static_cast<unsigned char>(1));

	105 // Fragment: +4/-4 is for excluding the start code.
	stefan-webrtc 2015/10/01 08:19:30 Instead of this comment you can name the constant Instead of this comment you can name the constant 4. Or even better, create a constant array: const uint8_t kStartCode[4] = {0, 0, 0, 1}; and use it above and use sizeof(kStartCode) below. hbos 2015/10/01 12:19:45 Done. Show quoted text On 2015/10/01 08:19:30, stefan-webrtc (holmer) wrote: > Instead of this comment you can name the constant 4. Or even better, create a > constant array: > const uint8_t kStartCode[4] = {0, 0, 0, 1}; > > and use it above and use sizeof(kStartCode) below. Done.
	106 frag_header->fragmentationOffset[frag_i] =

	107 encoded_image->_length + iLayerLen + 4;

	108 frag_header->fragmentationLength[frag_i] =

	109 layerInfo.pNalLengthInByte[iNal] - 4;

	110 iLayerLen += layerInfo.pNalLengthInByte[iNal];

	111 }

	112 // Copy the entire layer's data (including start codes).

	113 memcpy(encoded_image->_buffer + encoded_image->_length,

	114 layerInfo.pBsBuf,

	115 iLayerLen * sizeof(unsigned char));

	116 encoded_image->_length += iLayerLen;

	117 }

	118 }

	119

	120 H264EncoderImpl::H264EncoderImpl()

	121 : openh264_encoder_(nullptr),

	122 encoded_image_callback_(nullptr) {

	123 }

	124

	125 H264EncoderImpl::~H264EncoderImpl() {

	126 Release();

	127 }

	128

	129 int32_t H264EncoderImpl::InitEncode(const VideoCodec* codec_settings,

	130 int32_t /number_of_cores/,

	131 size_t /max_payload_size/) {

	132 if (!codec_settings \|\|

	133 codec_settings->codecType != VideoCodecType::kVideoCodecH264) {

	134 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	135 }

	136 if (codec_settings->maxFramerate == 0)

	137 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	138 if (codec_settings->width < 1 \|\| codec_settings->height < 1)

	139 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	140

	141 int release_ret = Release();

	142 if (release_ret != WEBRTC_VIDEO_CODEC_OK)

	143 return release_ret;

	144 DCHECK(!openh264_encoder_);

	145

	146 // Create encoder.

	147 if (WelsCreateSVCEncoder(&openh264_encoder_) != 0) {

	148 // Failed to create encoder.

	149 LOG(LS_ERROR) << "Failed to create OpenH264 encoder";

	150 DCHECK(!openh264_encoder_);

	151 return WEBRTC_VIDEO_CODEC_ERROR;

	152 }

	153 DCHECK(openh264_encoder_);

	154 if (kOpenH264EncoderDetailedLogging) {

	155 int trace_level = WELS_LOG_DETAIL;

	156 openh264_encoder_->SetOption(ENCODER_OPTION_TRACE_LEVEL,

	157 &trace_level);

	158 }

	159 // else WELS_LOG_DEFAULT is used by default.

	160

	161 codec_settings_ = *codec_settings;

	162 if (codec_settings_.targetBitrate == 0)

	163 codec_settings_.targetBitrate = codec_settings_.startBitrate;

	164

	165 // Initialization parameters.

	166 // There are two ways to initialize. There is SEncParamBase (cleared with

	167 // memset(&p, 0, sizeof(SEncParamBase)) used in Initialize, and SEncParamExt

	168 // which is a superset of SEncParamBase (cleared with GetDefaultParams) used

	169 // in InitializeExt.

	170 SEncParamExt init_params;

	171 openh264_encoder_->GetDefaultParams(&init_params);

	172 if (codec_settings_.mode == kRealtimeVideo) {

	173 init_params.iUsageType = CAMERA_VIDEO_REAL_TIME;

	174 } else if (codec_settings_.mode == kScreensharing) {

	175 init_params.iUsageType = SCREEN_CONTENT_REAL_TIME;

	176 } else {

	177 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	178 }

	179 init_params.iPicWidth = codec_settings_.width;

	180 init_params.iPicHeight = codec_settings_.height;

	181 // \|iTargetBitrate\| is in bit/s, \|targetBitrate\| is in kbit/s.

	182 init_params.iTargetBitrate = codec_settings_.targetBitrate * 1000;

	183 // Rate Control mode

	184 // TODO(hbos): Switch to RC_TIMESTAMP_MODE? VideoProcessorImpl::ProcessFrame

	185 // need to be updated to use proper time stamps or else it will not work and

	186 // VideoProcessorIntegrationTest.Process0PercentPacketLossH264 will fail.

	187 init_params.iRCMode = RC_BITRATE_MODE;

	188 init_params.fMaxFrameRate = static_cast<float>(codec_settings_.maxFramerate);

	189

	190 // The following parameters are extension parameters (they're in SEncParamExt,

	191 // not in SEncParamBase).

	192 init_params.bEnableFrameSkip =

	193 codec_settings_.codecSpecific.H264.frameDroppingOn;

	194 // \|uiIntraPeriod\| - multiple of GOP size

	195 // \|keyFrameInterval\| - ? number of frames? multiple of GOP size?
	stefan-webrtc 2015/10/01 08:19:30 keyFrameInterval is in frames. I'm not sure what m keyFrameInterval is in frames. I'm not sure what multiple of GOP size means when you have an IPPP structure, where the GOP can be very large... Maybe you can try a few settings and see how it behaves by logging when a key frame is generated? hbos 2015/10/01 12:19:45 I did some logging and for me setting uiIntraPerio Show quoted text On 2015/10/01 08:19:30, stefan-webrtc (holmer) wrote: > keyFrameInterval is in frames. I'm not sure what multiple of GOP size means when > you have an IPPP structure, where the GOP can be very large... Maybe you can try > a few settings and see how it behaves by logging when a key frame is generated? I did some logging and for me setting uiIntraPeriod to x meant every x-th frame was a key frame (= videoFrameTypeIDR). If I made the images change drastically (like putting my hand in front of the camera) I could get more key frames in-between. All other frames was videoFrameTypeP. So it works for now at least with the default video_loopback settings/frames, but I don't know what affects the GOP size or if there is a way to check what it is...
	196 init_params.uiIntraPeriod =

	197 codec_settings_.codecSpecific.H264.keyFrameInterval;

	198 init_params.uiMaxNalSize = 0;

	199 // Threading model: use auto.

	200 // 0: auto (dynamic imp. internal encoder)

	201 // 1: single thread (default value)

	202 // >1: number of threads

	203 init_params.iMultipleThreadIdc = 0;

	204 // The base spatial layer 0 is the only one we use.

	205 init_params.sSpatialLayers[0].iVideoWidth = init_params.iPicWidth;

	206 init_params.sSpatialLayers[0].iVideoHeight = init_params.iPicHeight;

	207 init_params.sSpatialLayers[0].fFrameRate = init_params.fMaxFrameRate;

	208 init_params.sSpatialLayers[0].iSpatialBitrate = init_params.iTargetBitrate;

	209 init_params.sSpatialLayers[0].iMaxSpatialBitrate = init_params.iMaxBitrate;

	210 // Slice num according to number of threads.

	211 init_params.sSpatialLayers[0].sSliceCfg.uiSliceMode = SM_AUTO_SLICE;

	212

	213 // Initialize.

	214 if (openh264_encoder_->InitializeExt(&init_params) != 0) {

	215 LOG(LS_ERROR) << "Failed to initialize OpenH264 encoder";

	216 Release();

	217 return WEBRTC_VIDEO_CODEC_ERROR;

	218 }

	219 int video_format = EVideoFormatType::videoFormatI420;

	220 openh264_encoder_->SetOption(ENCODER_OPTION_DATAFORMAT,

	221 &video_format);

	222

	223 // Initialize encoded image. Default buffer size: size of unencoded data.

	224 encoded_image_._size = CalcBufferSize(

	225 VideoType::kI420, codec_settings_.width, codec_settings_.height);

	226 encoded_image_._buffer = new uint8_t[encoded_image_._size];

	227 encoded_image_buffer_.reset(encoded_image_._buffer);

	228 encoded_image_._completeFrame = true;

	229 encoded_image_._encodedWidth = 0;

	230 encoded_image_._encodedHeight = 0;

	231 encoded_image_._length = 0;

	232 return WEBRTC_VIDEO_CODEC_OK;

	233 }

	234

	235 int32_t H264EncoderImpl::Release() {

	236 if (openh264_encoder_) {

	237 int uninit_ret = openh264_encoder_->Uninitialize();

	238 if (uninit_ret != 0) {

	239 LOG(LS_WARNING) << "OpenH264 encoder's Uninitialize() returned "

	240 << "unsuccessful: " << uninit_ret;

	241 }

	242 WelsDestroySVCEncoder(openh264_encoder_);

	243 openh264_encoder_ = nullptr;

	244 }

	245 if (encoded_image_._buffer != nullptr) {

	246 encoded_image_._buffer = nullptr;

	247 encoded_image_buffer_.reset(nullptr);
	stefan-webrtc 2015/10/01 08:19:30 Don't think you have to pass nullptr here. Don't think you have to pass nullptr here. hbos 2015/10/01 12:19:45 Done. Show quoted text On 2015/10/01 08:19:30, stefan-webrtc (holmer) wrote: > Don't think you have to pass nullptr here. Done.
	248 }

	249 return WEBRTC_VIDEO_CODEC_OK;

	250 }

	251

	252 int32_t H264EncoderImpl::RegisterEncodeCompleteCallback(

	253 EncodedImageCallback* callback) {

	254 encoded_image_callback_ = callback;

	255 return WEBRTC_VIDEO_CODEC_OK;

	256 }

	257

	258 int32_t H264EncoderImpl::SetRates(uint32_t bitrate, uint32_t framerate) {

	259 if (bitrate <= 0 \|\| framerate <= 0) {

	260 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	261 }

	262 codec_settings_.targetBitrate = bitrate;

	263 codec_settings_.maxFramerate = framerate;

	264

	265 SBitrateInfo target_bitrate;

	266 memset(&target_bitrate, 0, sizeof(SBitrateInfo));

	267 target_bitrate.iLayer = SPATIAL_LAYER_ALL,

	268 target_bitrate.iBitrate = codec_settings_.targetBitrate * 1000;

	269 openh264_encoder_->SetOption(ENCODER_OPTION_BITRATE,

	270 &target_bitrate);

	271 float max_framerate = static_cast<float>(codec_settings_.maxFramerate);

	272 openh264_encoder_->SetOption(ENCODER_OPTION_FRAME_RATE,

	273 &max_framerate);

	274 return WEBRTC_VIDEO_CODEC_OK;

	275 }

	276

	277 int32_t H264EncoderImpl::Encode(

	278 const VideoFrame& frame, const CodecSpecificInfo* codec_specific_info,

	279 const std::vector<VideoFrameType>* frame_types) {

	280 if (!IsInitialized())

	281 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

	282 if (frame.IsZeroSize())

	283 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	284 if (!encoded_image_callback_) {

	285 LOG(LS_WARNING) << "InitEncode() has been called, but a callback function "

	286 << "has not been set with RegisterEncodeCompleteCallback()";

	287 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

	288 }

	289 if (frame.width() != codec_settings_.width \|\|

	290 frame.height() != codec_settings_.height) {

	291 LOG(LS_WARNING) << "Encoder initialized for " << codec_settings_.width

	292 << "x" << codec_settings_.height << " but trying to encode "

	293 << frame.width() << "x" << frame.height() << " frame.";

	294 return WEBRTC_VIDEO_CODEC_ERR_SIZE;

	295 }

	296

	297 bool force_key_frame = false;

	298 if (frame_types != nullptr) {

	299 // Skip frame?

	300 if (std::find(frame_types->begin(), frame_types->end(),
	stefan-webrtc 2015/10/01 08:19:30 Since we only encode a single stream you might as Since we only encode a single stream you might as well DCHECK(frame_types.size() == 1) and then use frame_types[0]. hbos 2015/10/01 12:19:45 Done. Did not realize \|frame_types\| one type per s Show quoted text On 2015/10/01 08:19:30, stefan-webrtc (holmer) wrote: > Since we only encode a single stream you might as well DCHECK(frame_types.size() > == 1) > and then use frame_types[0]. Done. Did not realize \|frame_types\| one type per stream.
	301 kSkipFrame) != frame_types->end()) {

	302 return WEBRTC_VIDEO_CODEC_OK;

	303 }

	304 // Force key frame?

	305 force_key_frame = std::find(frame_types->begin(), frame_types->end(),

	306 kKeyFrame) != frame_types->end();

	307 }

	308 if (force_key_frame) {

	309 // Only need to call ForceIntraFrame when true. API doc says

	310 // ForceIntraFrame(false) does nothing but really if you call it for every

	311 // frame it introduces massive delays and lag in the video stream.
	stefan-webrtc 2015/10/01 08:19:30 What the... :O What the... :O hbos 2015/10/01 12:19:45 Yes :) and then it crashes after a few seconds. Show quoted text On 2015/10/01 08:19:30, stefan-webrtc (holmer) wrote: > What the... :O Yes :) and then it crashes after a few seconds.
	312 openh264_encoder_->ForceIntraFrame(true);

	313 }

	314

	315 // EncodeFrame input.

	316 SSourcePicture picture;

	317 memset(&picture, 0, sizeof(SSourcePicture));

	318 picture.iPicWidth = frame.width();

	319 picture.iPicHeight = frame.height();

	320 picture.iColorFormat = EVideoFormatType::videoFormatI420;

	321 // 90 kHz -> milliseconds (1 kHz)

	322 picture.uiTimeStamp = frame.timestamp() / 90;
	stefan-webrtc 2015/10/01 08:19:30 You have to make you handle a wraparound in frame. You have to make you handle a wraparound in frame.timestamp() since it's 90 kHz and uint32_t. An option might be to use ntp_time_ms() instead? hbos 2015/10/01 12:19:45 Done (using ntp_time_ms instead). Show quoted text On 2015/10/01 08:19:30, stefan-webrtc (holmer) wrote: > You have to make you handle a wraparound in frame.timestamp() since it's 90 kHz > and uint32_t. > > An option might be to use ntp_time_ms() instead? Done (using ntp_time_ms instead).
	323 picture.iStride[0] = frame.stride(kYPlane);

	324 picture.iStride[1] = frame.stride(kUPlane);

	325 picture.iStride[2] = frame.stride(kVPlane);

	326 picture.pData[0] = const_cast<uint8_t*>(frame.buffer(kYPlane));

	327 picture.pData[1] = const_cast<uint8_t*>(frame.buffer(kUPlane));

	328 picture.pData[2] = const_cast<uint8_t*>(frame.buffer(kVPlane));

	329

	330 // EncodeFrame output.

	331 SFrameBSInfo info;

	332 memset(&info, 0, sizeof(SFrameBSInfo));

	333

	334 // Encode!

	335 int enc_ret = openh264_encoder_->EncodeFrame(&picture, &info);

	336 if (enc_ret != 0) {

	337 LOG(LS_ERROR) << "OpenH264 frame encoding failed, EncodeFrame returned "

	338 << enc_ret << ".";

	339 return WEBRTC_VIDEO_CODEC_ERROR;

	340 }

	341

	342 encoded_image_._encodedWidth = frame.width();

	343 encoded_image_._encodedHeight = frame.height();

	344 encoded_image_._timeStamp = frame.timestamp();

	345 encoded_image_.ntp_time_ms_ = frame.ntp_time_ms();

	346 encoded_image_.capture_time_ms_ = frame.render_time_ms();

	347 encoded_image_._frameType = EVideoFrameType_to_VideoFrameType(

	348 info.eFrameType);

	349

	350 // Split encoded image up into fragments. This also updates \|encoded_image_\|.

	351 RTPFragmentationHeader frag_header;

	352 RtpFragmentize(&encoded_image_, &encoded_image_buffer_,

	353 frame, &info, &frag_header);

	354

	355 // Encoder can skip frames to save bandwidth in which case

	356 // \|encoded_image_._length\| == 0.

	357 if (encoded_image_._length > 0) {

	358 // Deliver encoded image.

	359 encoded_image_callback_->Encoded(encoded_image_, codec_specific_info,

	360 &frag_header);

	361 }

	362 return WEBRTC_VIDEO_CODEC_OK;

	363 }

	364

	365 bool H264EncoderImpl::IsInitialized() {

	366 return openh264_encoder_ != nullptr;

	367 }

	368

	369 int32_t H264EncoderImpl::SetChannelParameters(

	370 uint32_t packet_loss, int64_t rtt) {

	371 return WEBRTC_VIDEO_CODEC_OK;

	372 }

	373

	374 int32_t H264EncoderImpl::SetPeriodicKeyFrames(bool enable) {

	375 return WEBRTC_VIDEO_CODEC_OK;

	376 }

	377

	378 int32_t H264EncoderImpl::CodecConfigParameters(uint8_t* buffer, int32_t size) {

	379 return WEBRTC_VIDEO_CODEC_OK;

	380 }

	381

	382 void H264EncoderImpl::OnDroppedFrame() {

	383 }

	384

	385 } // namespace webrtc

OLD	NEW