webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc - Issue 1306813009: H.264 video codec support using OpenH264/FFmpeg

Side by Side Diff: webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc

Issue 1306813009: H.264 video codec support using OpenH264/FFmpeg (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Addressed nits Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h ('K') | « webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h ('k') | webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h » ('j') | webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 *

	10 */

	11

	12 #include "webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h"

	13

	14 #include <algorithm>

	15

	16 extern "C" {

	17 #include "third_party/ffmpeg/libavcodec/avcodec.h"

	18 #include "third_party/ffmpeg/libavformat/avformat.h"

	19 #include "third_party/ffmpeg/libavutil/imgutils.h"

	20 } // extern "C"

	21

	22 #include "webrtc/base/checks.h"

	23 #include "webrtc/base/criticalsection.h"

	24 #include "webrtc/base/logging.h"

	25

	26 namespace webrtc {

	27

	28 namespace {

	29

	30 static const AVPixelFormat kPixelFormat = AV_PIX_FMT_YUV420P;

	31 static const size_t kYPlaneIndex = 0;

	32 static const size_t kUPlaneIndex = 1;

	33 static const size_t kVPlaneIndex = 2;

	34

	35 #if !defined(WEBRTC_CHROMIUM_BUILD)

	36

	37 static bool ffmpeg_initialized = false;

	38

	39 // Called by FFmpeg to do mutex operations if init using InitializeFFmpeg.
	palmer 2016/01/11 22:27:09 Nit: use \|...\| to signal identifiers. For example, Nit: use \|...\| to signal identifiers. For example, I don't know if "init" and "IntializeFFmpeg" are identifiers in this comment. hbos 2016/01/12 13:56:27 Done. Show quoted text On 2016/01/11 22:27:09, palmer wrote: > Nit: use \|...\| to signal identifiers. For example, I don't know if "init" and > "IntializeFFmpeg" are identifiers in this comment. Done.
	40 static int LockManagerOperation(void** lock, AVLockOp op)
	palmer 2016/01/11 22:27:09 You don't need to declare things in the anonymous You don't need to declare things in the anonymous namespace as static — they are private to this compilation unit by virtue of being in the anonymous namespace. hbos 2016/01/12 13:56:27 Done. I think anonymous namespace is preferred to Show quoted text On 2016/01/11 22:27:09, palmer wrote: > You don't need to declare things in the anonymous namespace as static — they are > private to this compilation unit by virtue of being in the anonymous namespace. Done. I think anonymous namespace is preferred to the static keyword, but I typically still use the keyword to make it extra clear that it's private in case one misses the fact that we're in that namespace on a particular line of code. But sure, it's superfluous, I'll remove the unnecessary statics.
	41 EXCLUSIVE_LOCK_FUNCTION() UNLOCK_FUNCTION() {

	42 switch (op) {

	43 case AV_LOCK_CREATE:

	44 *lock = new rtc::CriticalSection();

	45 return 0;

	46 case AV_LOCK_OBTAIN:

	47 static_cast<rtc::CriticalSection>(lock)->Enter();

	48 return 0;

	49 case AV_LOCK_RELEASE:

	50 static_cast<rtc::CriticalSection>(lock)->Leave();

	51 return 0;

	52 case AV_LOCK_DESTROY:

	53 delete static_cast<rtc::CriticalSection>(lock);

	54 *lock = nullptr;

	55 return 0;

	56 }

	57 return 1;
	palmer 2016/01/11 22:27:09 Since the return values are just 0 and 1, should t Since the return values are just 0 and 1, should they be true and false instead, and the return type declared as bool? Shouldn't you use NOTREACHED here, too? hbos 2016/01/12 13:56:26 Return type: FFmpeg defines it to return int. C st Show quoted text On 2016/01/11 22:27:09, palmer wrote: > Since the return values are just 0 and 1, should they be true and false instead, > and the return type declared as bool? > > Shouldn't you use NOTREACHED here, too? Return type: FFmpeg defines it to return int. C style. The Dude abides. NOTREACHED: You're right. Returning non-zero means the operation failed, but if it fails due to us not recognizing an AVLockOp we must update our implementation and crashing is the way to go to make this evident.
	58 }

	59

	60 // TODO(hbos): Assumed to be called on a single thread. Should DCHECK that
	palmer 2016/01/11 22:27:09 Link to a bug in all TODOs. Link to a bug in all TODOs. hbos 2016/01/12 13:56:26 Done. Show quoted text On 2016/01/11 22:27:09, palmer wrote: > Link to a bug in all TODOs. Done.
	61 // InitializeFFmpeg is only called on one thread or make it thread safe.

	62 static bool InitializeFFmpeg() {

	63 if (!ffmpeg_initialized) {

	64 if (av_lockmgr_register(LockManagerOperation) < 0) {

	65 LOG(LS_ERROR) << "av_lockmgr_register failed.";
	palmer 2016/01/11 22:27:09 Should this ever happen? Should it be CHECK or NOT Should this ever happen? Should it be CHECK or NOTREACHED? hbos 2016/01/12 13:56:27 Done. Show quoted text On 2016/01/11 22:27:09, palmer wrote: > Should this ever happen? Should it be CHECK or NOTREACHED? Done.
	66 return false;

	67 }

	68 av_register_all();

	69 ffmpeg_initialized = true;

	70 }

	71 return true;

	72 }

	73

	74 #endif // !defined(WEBRTC_CHROMIUM_BUILD)

	75

	76 static int NumberOfThreads(int width, int height, int number_of_cores) {
	mflodman 2016/01/12 10:31:25 See my comment below about threads. See my comment below about threads. hbos 2016/01/12 13:56:26 Acknowledged. Show quoted text On 2016/01/12 10:31:25, mflodman wrote: > See my comment below about threads. Acknowledged.
	77 if (width * height >= 1920 * 1080 && number_of_cores > 8) {

	78 return 8; // 8 threads for 1080p on high perf machines.

	79 } else if (width * height > 1280 * 960 && number_of_cores >= 6) {

	80 return 3; // 3 threads for 1080p.

	81 } else if (width * height > 640 * 480 && number_of_cores >= 3) {

	82 return 2; // 2 threads for qHD/HD.

	83 } else {

	84 return 1; // 1 thread for VGA or less.

	85 }

	86 }

	87

	88 // Called by FFmpeg when it is done with a frame buffer, see AVGetBuffer2.

	89 static void AVFreeBuffer2(void* opaque, uint8_t* data) {

	90 VideoFrame* video_frame = static_cast<VideoFrame*>(opaque);
	palmer 2016/01/11 22:27:09 This looks dangerous. What guarantee is there that This looks dangerous. What guarantee is there that \|opaque\| is safely castable to \|VideoFrame\|? (Or even that it was allocated on the free store?) There should be some type-safe way to release these objects — scoped_ptr, unique_ptr, et c. See also \|DeleteSoon\| in Chromium (base/sequenced_task_runner.h). hbos* 2016/01/12 13:56:27 The only "guarantee" is that for each VideoFrame w Show quoted text On 2016/01/11 22:27:09, palmer wrote: > This looks dangerous. What guarantee is there that \|opaque\| is safely castable > to \|VideoFrame\|? (Or even that it was allocated on the free store?) There > should be some type-safe way to release these objects — scoped_ptr, unique_ptr, > et c. See also \|DeleteSoon\| in Chromium (base/sequenced_task_runner.h). The only "guarantee" is that for each VideoFrame we allocate we tell FFmpeg to call \|AVFreeBuffer2\| when that very buffer should be freed, \|AVFreeBuffer2\| is not used for anything else. It's guaranteed by the API, but technically it is not type-safe no. In \|AVGetBuffer2\| we set a void, in \|AVFreeBuffer2\| we get that void* and are responsible for for deleting it. If there is a bug that makes this function be called with something else we wouldn't know (without RTTI?) unless we keep a record of all the buffers and verify that it is indeed one of the buffers before we delete it. - If you want, I could make this a feature of the frame buffer pool (pool = TODO for a follow-up)? Then the void* works more like an identifier for the buffer and we don't necessarily have to cast. Otherwise, I think it's safe to cast based on the API.
	91 delete video_frame;

	92 }

	93

	94 // Called by FFmpeg when it needs a frame buffer to store decoded frames in.

	95 // The VideoFrames returned by FFmpeg at Decode originate from here. They are

	96 // reference counted and freed by FFmpeg using AVFreeBuffer2.

	97 // TODO(hbos): Use a frame pool for better performance instead of create/free.

	98 // Could be owned by decoder, static_cast<H264DecoderImpl*>(context->opaque).

	99 static int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) {

	100 RTC_CHECK_EQ(context->pix_fmt, kPixelFormat); // Same as in InitDecode.

	101 // width/height and coded_width/coded_height can be different due to cropping

	102 // or \|lowres\|.

	103 int width = std::max(context->width, context->coded_width);

	104 int height = std::max(context->height, context->coded_height);

	105 // See \|lowres\|, if used the decoder scales the image by 1/2^(lowres). This

	106 // has implications on which resolutions are valid, but we don't use it.

	107 RTC_CHECK_EQ(context->lowres, 0);

	108

	109 RTC_CHECK_GE(width, 0);

	110 RTC_CHECK_GE(height, 0);

	111 int ret = av_image_check_size(width, height, 0, nullptr);

	112 if (ret < 0) {

	113 LOG(LS_ERROR) << "Invalid picture size " << width << "x" << height;

	114 return ret;

	115 }

	116

	117 // The video frame is stored in \|video_frame\|. \|av_frame\| is FFmpeg's version

	118 // of a video frame and will be set up to reference \|video_frame\|'s buffers.

	119 VideoFrame* video_frame = new VideoFrame();

	120 int stride_y = width;

	121 int stride_uv = (width + 1) / 2;

	122 RTC_CHECK_EQ(0, video_frame->CreateEmptyFrame(

	123 width, height, stride_y, stride_uv, stride_uv));

	124 size_t total_size = video_frame->allocated_size(kYPlane) +
	palmer 2016/01/11 22:27:09 Could this arithmetic overflow? Could this arithmetic overflow? hbos 2016/01/12 13:56:26 No, av_image_check_size (called above) makes sure Show quoted text On 2016/01/11 22:27:09, palmer wrote: > Could this arithmetic overflow? No, av_image_check_size (called above) makes sure the dimensions are safe: "Check if the given dimension of an image is valid, meaning that all bytes of the image can be addressed with a signed int." Thus, calculating the size will not overflow. (Technically, the size = one past the last byte, could be an overflow, but you can't achieve a resolution like this with (int width) by (int height) when both w and h are >= 0.)
	125 video_frame->allocated_size(kUPlane) +

	126 video_frame->allocated_size(kVPlane);

	127 RTC_DCHECK_EQ(total_size, static_cast<size_t>(stride_y * height +
	palmer 2016/01/11 22:27:09 And this arithmetic. Also, casting the result to And this arithmetic. Also, casting the result to size is not necessarily the same as correctly casting each operand to size_t before doing the arithmetic. hbos 2016/01/12 13:56:26 See previous comment. Switched to int instead of s Show quoted text On 2016/01/11 22:27:09, palmer wrote: > And this arithmetic. > > Also, casting the result to size is not necessarily the same as correctly > casting each operand to size_t before doing the arithmetic. See previous comment. Switched to int instead of size_t to not have to cast.
	128 (stride_uv + stride_uv) * ((height + 1) / 2)));

	129 // FFmpeg note: "Each data plane must be aligned to the maximum required by

	130 // the target CPU." See get_buffer2.

	131 // TODO(hbos): Memory alignment on a per-plane basis. CreateEmptyFrame only

	132 // guarantees that the buffer of all planes is memory aligned, not each

	133 // individual plane. Or does "data plane" here refer to one data[] entry or

	134 // one allocation?

	135

	136 // FFmpeg expects the initial allocation to be zero-initialized according to

	137 // http://crbug.com/390941.

	138 // Using a single \|av_frame->buf\| - YUV is required to be a continuous blob of

	139 // memory. We can zero-initialize with one memset operation for all planes.

	140 RTC_DCHECK_EQ(video_frame->buffer(kUPlane),

	141 video_frame->buffer(kYPlane) + video_frame->allocated_size(kYPlane));

	142 RTC_DCHECK_EQ(video_frame->buffer(kVPlane),

	143 video_frame->buffer(kUPlane) + video_frame->allocated_size(kUPlane));

	144 memset(video_frame->buffer(kYPlane), 0, total_size);

	145

	146 RTC_DCHECK_EQ(av_frame->width, width);

	147 RTC_DCHECK_EQ(av_frame->height, height);

	148 av_frame->format = context->pix_fmt;

	149 av_frame->reordered_opaque = context->reordered_opaque;

	150

	151 // Set \|av_frame\| members as required by FFmpeg.

	152 av_frame->data[kYPlaneIndex] = video_frame->buffer(kYPlane);

	153 av_frame->linesize[kYPlaneIndex] = video_frame->stride(kYPlane);

	154 av_frame->data[kUPlaneIndex] = video_frame->buffer(kUPlane);

	155 av_frame->linesize[kUPlaneIndex] = video_frame->stride(kUPlane);

	156 av_frame->data[kVPlaneIndex] = video_frame->buffer(kVPlane);

	157 av_frame->linesize[kVPlaneIndex] = video_frame->stride(kVPlane);

	158 RTC_DCHECK_EQ(av_frame->extended_data, av_frame->data);

	159

	160 av_frame->buf[0] = av_buffer_create(av_frame->data[kYPlaneIndex],

	161 total_size,

	162 AVFreeBuffer2,

	163 static_cast<void*>(video_frame),

	164 0);

	165 RTC_CHECK(av_frame->buf[0]);

	166 return 0;

	167 }

	168

	169 } // namespace

	170

	171 H264DecoderImpl::H264DecoderImpl()

	172 : decoded_image_callback_(nullptr) {

	173 }

	174

	175 H264DecoderImpl::~H264DecoderImpl() {

	176 Release();

	177 }

	178

	179 int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings,

	180 int32_t number_of_cores) {

	181 if (codec_settings &&

	182 codec_settings->codecType != kVideoCodecH264) {

	183 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	184 }

	185

	186 // In Chromium FFmpeg will be initialized outside of WebRTC and we should not

	187 // attempt to do so ourselves or it will be initialized twice.

	188 // TODO(hbos): Put behind a different flag in case non-chromium project wants

	189 // to initialize externally.

	190 #if !defined(WEBRTC_CHROMIUM_BUILD)

	191 // Make sure FFmpeg has been initialized.

	192 InitializeFFmpeg();

	193 #endif

	194

	195 // Release necessary in case of re-initializing.

	196 int32_t ret = Release();

	197 if (ret != WEBRTC_VIDEO_CODEC_OK)

	198 return ret;

	199 RTC_DCHECK(!av_context_);

	200

	201 // Initialize AVCodecContext.

	202 av_context_.reset(avcodec_alloc_context3(nullptr));

	203

	204 av_context_->codec_type = AVMEDIA_TYPE_VIDEO;

	205 av_context_->codec_id = AV_CODEC_ID_H264;

	206 if (codec_settings) {

	207 av_context_->coded_width = codec_settings->width;

	208 av_context_->coded_height = codec_settings->height;

	209 }

	210 av_context_->pix_fmt = kPixelFormat;

	211 av_context_->extradata = nullptr;

	212 av_context_->extradata_size = 0;

	213

	214 av_context_->thread_count = NumberOfThreads(av_context_->coded_width,
	mflodman 2016/01/12 10:31:24 This seems like a lot of threads for decoding, for This seems like a lot of threads for decoding, for VP8 and VP9 we always use only one for decode but multiple for encode. I'd like to set only one here too unless we've seen good gains by this, but in that case use less than is done here. hbos 2016/01/12 13:56:26 OK. 1 it is. Show quoted text On 2016/01/12 10:31:24, mflodman wrote: > This seems like a lot of threads for decoding, for VP8 and VP9 we always use > only one for decode but multiple for encode. I'd like to set only one here too > unless we've seen good gains by this, but in that case use less than is done > here. OK. 1 it is.
	215 av_context_->coded_height,

	216 number_of_cores);

	217 av_context_->thread_type = FF_THREAD_SLICE;

	218

	219 // FFmpeg will get video buffers from our AVGetBuffer2, memory managed by us.

	220 av_context_->get_buffer2 = AVGetBuffer2;

	221 // get_buffer2 is called with the context, there \|opaque\| can be used to get a

	222 // pointer \|this\|.

	223 av_context_->opaque = this;

	224 // Use ref counted frames (av_frame_unref).

	225 av_context_->refcounted_frames = 1; // true

	226

	227 AVCodec* codec = avcodec_find_decoder(av_context_->codec_id);

	228 if (!codec) {

	229 // This is an indication that FFmpeg has not been initialized or it has not

	230 // been compiled/initialized with the correct set of codecs.

	231 LOG(LS_ERROR) << "FFmpeg H.264 decoder not found.";

	232 Release();

	233 return WEBRTC_VIDEO_CODEC_ERROR;

	234 }

	235 int res = avcodec_open2(av_context_.get(), codec, nullptr);

	236 if (res < 0) {

	237 LOG(LS_ERROR) << "avcodec_open2 error: " << res;

	238 Release();

	239 return WEBRTC_VIDEO_CODEC_ERROR;

	240 }

	241

	242 av_frame_.reset(av_frame_alloc());

	243 return WEBRTC_VIDEO_CODEC_OK;

	244 }

	245

	246 int32_t H264DecoderImpl::Release() {

	247 av_context_.reset();

	248 av_frame_.reset();

	249 return WEBRTC_VIDEO_CODEC_OK;

	250 }

	251

	252 int32_t H264DecoderImpl::Reset() {

	253 if (!IsInitialized())

	254 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

	255 InitDecode(nullptr, 1);

	256 return WEBRTC_VIDEO_CODEC_OK;

	257 }

	258

	259 int32_t H264DecoderImpl::RegisterDecodeCompleteCallback(

	260 DecodedImageCallback* callback) {

	261 decoded_image_callback_ = callback;

	262 return WEBRTC_VIDEO_CODEC_OK;

	263 }

	264

	265 int32_t H264DecoderImpl::Decode(const EncodedImage& input_image,

	266 bool /missing_frames/,

	267 const RTPFragmentationHeader* /fragmentation/,

	268 const CodecSpecificInfo* codec_specific_info,

	269 int64_t /render_time_ms/) {

	270 if (!IsInitialized())

	271 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

	272 if (!decoded_image_callback_) {

	273 LOG(LS_WARNING) << "InitDecode() has been called, but a callback function "

	274 "has not been set with RegisterDecodeCompleteCallback()";

	275 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

	276 }

	277 if (!input_image._buffer \|\| !input_image._length)

	278 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	279 if (codec_specific_info &&

	280 codec_specific_info->codecType != kVideoCodecH264) {

	281 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

	282 }

	283

	284 AVPacket packet;

	285 av_init_packet(&packet);

	286 // TODO(hbos): "The input buffer must be AV_INPUT_BUFFER_PADDING_SIZE larger

	287 // than the actual read bytes because some optimized bitstream readers read 32

	288 // or 64 bits at once and could read over the end." See avcodec_decode_video2.

	289 // - Is this an issue? Do we have to make sure EncodedImage is allocated with
	palmer 2016/01/11 22:27:09 Yes, this is an issue. Round the allocation size u Yes, this is an issue. Round the allocation size up. hbos 2016/01/12 13:56:27 Will address in follow-up CL. There is a bug creat Show quoted text On 2016/01/11 22:27:09, palmer wrote: > Yes, this is an issue. Round the allocation size up. Will address in follow-up CL. There is a bug created for this. Will not unflag H264 until all TODOs are addressed.
	290 // additional bytes or do we have to do an otherwise unnecessary copy? Might

	291 // only be a problem with non-mul-32 frame widths?

	292 // ("If the first 23 bits of the additional bytes are not 0, then damaged MPEG

	293 // bitstreams could cause overread and segfault.")

	294 // See issue: https://bugs.chromium.org/p/webrtc/issues/detail?id=5424

	295 packet.data = input_image._buffer;

	296 packet.size = input_image._length;

	297 av_context_->reordered_opaque = input_image.ntp_time_ms_ * 1000; // ms -> μs

	298

	299 int frame_decoded = 0;

	300 int result = avcodec_decode_video2(av_context_.get(),

	301 av_frame_.get(),

	302 &frame_decoded,

	303 &packet);

	304 if (result < 0) {

	305 LOG(LS_ERROR) << "avcodec_decode_video2 error: " << result;

	306 return WEBRTC_VIDEO_CODEC_ERROR;

	307 }

	308 // \|result\| is number of bytes used, which should be all of them.

	309 if (result != packet.size) {

	310 LOG(LS_ERROR) << "avcodec_decode_video2 consumed " << result << " bytes "

	311 "when " << packet.size << " bytes were expected.";

	312 return WEBRTC_VIDEO_CODEC_ERROR;

	313 }

	314

	315 if (!frame_decoded) {

	316 LOG(LS_WARNING) << "avcodec_decode_video2 successful but no frame was "

	317 "decoded.";

	318 return WEBRTC_VIDEO_CODEC_OK;

	319 }

	320

	321 // Obtain the \|video_frame\| containing the decoded image.

	322 VideoFrame* video_frame = static_cast<VideoFrame*>(

	323 av_buffer_get_opaque(av_frame_->buf[0]));

	324 RTC_DCHECK(video_frame);

	325 RTC_CHECK_EQ(av_frame_->data[kYPlane], video_frame->buffer(kYPlane));

	326 RTC_CHECK_EQ(av_frame_->data[kUPlane], video_frame->buffer(kUPlane));

	327 RTC_CHECK_EQ(av_frame_->data[kVPlane], video_frame->buffer(kVPlane));

	328 video_frame->set_timestamp(input_image._timeStamp);

	329

	330 // Return decoded frame.

	331 int32_t ret = decoded_image_callback_->Decoded(*video_frame);

	332 // Stop referencing it, possibly freeing \|video_frame\|.

	333 av_frame_unref(av_frame_.get());

	334 video_frame = nullptr;

	335

	336 if (ret) {

	337 LOG(LS_WARNING) << "DecodedImageCallback::Decoded returned " << ret;

	338 return ret;

	339 }

	340 return WEBRTC_VIDEO_CODEC_OK;

	341 }

	342

	343 bool H264DecoderImpl::IsInitialized() const {

	344 return av_context_ != nullptr;

	345 }

	346

	347 } // namespace webrtc

OLD	NEW