webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc - Issue 1306813009: H.264 video codec support using OpenH264/FFmpeg

Unified Diff: webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc

Issue 1306813009: H.264 video codec support using OpenH264/FFmpeg (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Addressed nits Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h ('K') | « webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h ('k') | webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.h » ('j') | webrtc/modules/video_coding/codecs/h264/h264_encoder_impl.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc

diff --git a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc

new file mode 100644

index 0000000000000000000000000000000000000000..892019c026e422e6ce87adc5bbe538c34561ec3f

--- /dev/null

+++ b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc

@@ -0,0 +1,347 @@

+/*

+ *

+ * Use of this source code is governed by a BSD-style license

+ * that can be found in the LICENSE file in the root of the source

+ * tree. An additional intellectual property rights grant can be found

+ * in the file PATENTS. All contributing project authors may

+ * be found in the AUTHORS file in the root of the source tree.

+ *

+ */

+#include "webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h"

+#include <algorithm>

+extern "C" {

+#include "third_party/ffmpeg/libavcodec/avcodec.h"

+#include "third_party/ffmpeg/libavformat/avformat.h"

+#include "third_party/ffmpeg/libavutil/imgutils.h"

+} // extern "C"

+#include "webrtc/base/checks.h"

+#include "webrtc/base/criticalsection.h"

+#include "webrtc/base/logging.h"

+namespace webrtc {

+namespace {

+static const AVPixelFormat kPixelFormat = AV_PIX_FMT_YUV420P;

+static const size_t kYPlaneIndex = 0;

+static const size_t kUPlaneIndex = 1;

+static const size_t kVPlaneIndex = 2;

+#if !defined(WEBRTC_CHROMIUM_BUILD)

+static bool ffmpeg_initialized = false;

+// Called by FFmpeg to do mutex operations if init using InitializeFFmpeg.

palmer 2016/01/11 22:27:09 Nit: use |...| to signal identifiers. For example,

hbos 2016/01/12 13:56:27 Done.

+static int LockManagerOperation(void** lock, AVLockOp op)

palmer 2016/01/11 22:27:09 You don't need to declare things in the anonymous

hbos 2016/01/12 13:56:27 Done. I think anonymous namespace is preferred to

+ EXCLUSIVE_LOCK_FUNCTION() UNLOCK_FUNCTION() {

+ switch (op) {

+ case AV_LOCK_CREATE:

+ *lock = new rtc::CriticalSection();

+ return 0;

+ case AV_LOCK_OBTAIN:

+ static_cast<rtc::CriticalSection*>(*lock)->Enter();

+ return 0;

+ case AV_LOCK_RELEASE:

+ static_cast<rtc::CriticalSection*>(*lock)->Leave();

+ return 0;

+ case AV_LOCK_DESTROY:

+ delete static_cast<rtc::CriticalSection*>(*lock);

+ *lock = nullptr;

+ return 0;

+ }

+ return 1;

palmer 2016/01/11 22:27:09 Since the return values are just 0 and 1, should t

hbos 2016/01/12 13:56:26 Return type: FFmpeg defines it to return int. C st

+// TODO(hbos): Assumed to be called on a single thread. Should DCHECK that

palmer 2016/01/11 22:27:09 Link to a bug in all TODOs.

hbos 2016/01/12 13:56:26 Done.

+// InitializeFFmpeg is only called on one thread or make it thread safe.

+static bool InitializeFFmpeg() {

+ if (!ffmpeg_initialized) {

+ if (av_lockmgr_register(LockManagerOperation) < 0) {

+ LOG(LS_ERROR) << "av_lockmgr_register failed.";

palmer 2016/01/11 22:27:09 Should this ever happen? Should it be CHECK or NOT

hbos 2016/01/12 13:56:27 Done.

+ return false;

+ }

+ av_register_all();

+ ffmpeg_initialized = true;

+ }

+ return true;

+#endif // !defined(WEBRTC_CHROMIUM_BUILD)

+static int NumberOfThreads(int width, int height, int number_of_cores) {

mflodman 2016/01/12 10:31:25 See my comment below about threads.

hbos 2016/01/12 13:56:26 Acknowledged.

+ if (width * height >= 1920 * 1080 && number_of_cores > 8) {

+ return 8; // 8 threads for 1080p on high perf machines.

+ } else if (width * height > 1280 * 960 && number_of_cores >= 6) {

+ return 3; // 3 threads for 1080p.

+ } else if (width * height > 640 * 480 && number_of_cores >= 3) {

+ return 2; // 2 threads for qHD/HD.

+ } else {

+ return 1; // 1 thread for VGA or less.

+ }

+// Called by FFmpeg when it is done with a frame buffer, see AVGetBuffer2.

+static void AVFreeBuffer2(void* opaque, uint8_t* data) {

+ VideoFrame* video_frame = static_cast<VideoFrame*>(opaque);

palmer 2016/01/11 22:27:09 This looks dangerous. What guarantee is there that

hbos 2016/01/12 13:56:27 The only "guarantee" is that for each VideoFrame w

+ delete video_frame;

+// Called by FFmpeg when it needs a frame buffer to store decoded frames in.

+// The VideoFrames returned by FFmpeg at Decode originate from here. They are

+// reference counted and freed by FFmpeg using AVFreeBuffer2.

+// TODO(hbos): Use a frame pool for better performance instead of create/free.

+// Could be owned by decoder, static_cast<H264DecoderImpl*>(context->opaque).

+static int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) {

+ RTC_CHECK_EQ(context->pix_fmt, kPixelFormat); // Same as in InitDecode.

+ // width/height and coded_width/coded_height can be different due to cropping

+ // or |lowres|.

+ int width = std::max(context->width, context->coded_width);

+ int height = std::max(context->height, context->coded_height);

+ // See |lowres|, if used the decoder scales the image by 1/2^(lowres). This

+ // has implications on which resolutions are valid, but we don't use it.

+ RTC_CHECK_EQ(context->lowres, 0);

+ RTC_CHECK_GE(width, 0);

+ RTC_CHECK_GE(height, 0);

+ int ret = av_image_check_size(width, height, 0, nullptr);

+ if (ret < 0) {

+ LOG(LS_ERROR) << "Invalid picture size " << width << "x" << height;

+ return ret;

+ }

+ // The video frame is stored in |video_frame|. |av_frame| is FFmpeg's version

+ // of a video frame and will be set up to reference |video_frame|'s buffers.

+ VideoFrame* video_frame = new VideoFrame();

+ int stride_y = width;

+ int stride_uv = (width + 1) / 2;

+ RTC_CHECK_EQ(0, video_frame->CreateEmptyFrame(

+ width, height, stride_y, stride_uv, stride_uv));

+ size_t total_size = video_frame->allocated_size(kYPlane) +

palmer 2016/01/11 22:27:09 Could this arithmetic overflow?

hbos 2016/01/12 13:56:26 No, av_image_check_size (called above) makes sure

+ video_frame->allocated_size(kUPlane) +

+ video_frame->allocated_size(kVPlane);

+ RTC_DCHECK_EQ(total_size, static_cast<size_t>(stride_y * height +

palmer 2016/01/11 22:27:09 And this arithmetic. Also, casting the result to

hbos 2016/01/12 13:56:26 See previous comment. Switched to int instead of s

+ (stride_uv + stride_uv) * ((height + 1) / 2)));

+ // FFmpeg note: "Each data plane must be aligned to the maximum required by

+ // the target CPU." See get_buffer2.

+ // TODO(hbos): Memory alignment on a per-plane basis. CreateEmptyFrame only

+ // guarantees that the buffer of all planes is memory aligned, not each

+ // individual plane. Or does "data plane" here refer to one data[] entry or

+ // one allocation?

+ // FFmpeg expects the initial allocation to be zero-initialized according to

+ // http://crbug.com/390941.

+ // Using a single |av_frame->buf| - YUV is required to be a continuous blob of

+ // memory. We can zero-initialize with one memset operation for all planes.

+ RTC_DCHECK_EQ(video_frame->buffer(kUPlane),

+ video_frame->buffer(kYPlane) + video_frame->allocated_size(kYPlane));

+ RTC_DCHECK_EQ(video_frame->buffer(kVPlane),

+ video_frame->buffer(kUPlane) + video_frame->allocated_size(kUPlane));

+ memset(video_frame->buffer(kYPlane), 0, total_size);

+ RTC_DCHECK_EQ(av_frame->width, width);

+ RTC_DCHECK_EQ(av_frame->height, height);

+ av_frame->format = context->pix_fmt;

+ av_frame->reordered_opaque = context->reordered_opaque;

+ // Set |av_frame| members as required by FFmpeg.

+ av_frame->data[kYPlaneIndex] = video_frame->buffer(kYPlane);

+ av_frame->linesize[kYPlaneIndex] = video_frame->stride(kYPlane);

+ av_frame->data[kUPlaneIndex] = video_frame->buffer(kUPlane);

+ av_frame->linesize[kUPlaneIndex] = video_frame->stride(kUPlane);

+ av_frame->data[kVPlaneIndex] = video_frame->buffer(kVPlane);

+ av_frame->linesize[kVPlaneIndex] = video_frame->stride(kVPlane);

+ RTC_DCHECK_EQ(av_frame->extended_data, av_frame->data);

+ av_frame->buf[0] = av_buffer_create(av_frame->data[kYPlaneIndex],

+ total_size,

+ AVFreeBuffer2,

+ static_cast<void*>(video_frame),

+ 0);

+ RTC_CHECK(av_frame->buf[0]);

+ return 0;

+} // namespace

+H264DecoderImpl::H264DecoderImpl()

+ : decoded_image_callback_(nullptr) {

+H264DecoderImpl::~H264DecoderImpl() {

+ Release();

+int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings,

+ int32_t number_of_cores) {

+ if (codec_settings &&

+ codec_settings->codecType != kVideoCodecH264) {

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ }

+ // In Chromium FFmpeg will be initialized outside of WebRTC and we should not

+ // attempt to do so ourselves or it will be initialized twice.

+ // TODO(hbos): Put behind a different flag in case non-chromium project wants

+ // to initialize externally.

+#if !defined(WEBRTC_CHROMIUM_BUILD)

+ // Make sure FFmpeg has been initialized.

+ InitializeFFmpeg();

+#endif

+ // Release necessary in case of re-initializing.

+ int32_t ret = Release();

+ if (ret != WEBRTC_VIDEO_CODEC_OK)

+ return ret;

+ RTC_DCHECK(!av_context_);

+ // Initialize AVCodecContext.

+ av_context_.reset(avcodec_alloc_context3(nullptr));

+ av_context_->codec_type = AVMEDIA_TYPE_VIDEO;

+ av_context_->codec_id = AV_CODEC_ID_H264;

+ if (codec_settings) {

+ av_context_->coded_width = codec_settings->width;

+ av_context_->coded_height = codec_settings->height;

+ }

+ av_context_->pix_fmt = kPixelFormat;

+ av_context_->extradata = nullptr;

+ av_context_->extradata_size = 0;

+ av_context_->thread_count = NumberOfThreads(av_context_->coded_width,

mflodman 2016/01/12 10:31:24 This seems like a lot of threads for decoding, for

hbos 2016/01/12 13:56:26 OK. 1 it is.

+ av_context_->coded_height,

+ number_of_cores);

+ av_context_->thread_type = FF_THREAD_SLICE;

+ // FFmpeg will get video buffers from our AVGetBuffer2, memory managed by us.

+ av_context_->get_buffer2 = AVGetBuffer2;

+ // get_buffer2 is called with the context, there |opaque| can be used to get a

+ // pointer |this|.

+ av_context_->opaque = this;

+ // Use ref counted frames (av_frame_unref).

+ av_context_->refcounted_frames = 1; // true

+ AVCodec* codec = avcodec_find_decoder(av_context_->codec_id);

+ if (!codec) {

+ // This is an indication that FFmpeg has not been initialized or it has not

+ // been compiled/initialized with the correct set of codecs.

+ LOG(LS_ERROR) << "FFmpeg H.264 decoder not found.";

+ Release();

+ return WEBRTC_VIDEO_CODEC_ERROR;

+ }

+ int res = avcodec_open2(av_context_.get(), codec, nullptr);

+ if (res < 0) {

+ LOG(LS_ERROR) << "avcodec_open2 error: " << res;

+ Release();

+ return WEBRTC_VIDEO_CODEC_ERROR;

+ }

+ av_frame_.reset(av_frame_alloc());

+ return WEBRTC_VIDEO_CODEC_OK;

+int32_t H264DecoderImpl::Release() {

+ av_context_.reset();

+ av_frame_.reset();

+ return WEBRTC_VIDEO_CODEC_OK;

+int32_t H264DecoderImpl::Reset() {

+ if (!IsInitialized())

+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

+ InitDecode(nullptr, 1);

+ return WEBRTC_VIDEO_CODEC_OK;

+int32_t H264DecoderImpl::RegisterDecodeCompleteCallback(

+ DecodedImageCallback* callback) {

+ decoded_image_callback_ = callback;

+ return WEBRTC_VIDEO_CODEC_OK;

+int32_t H264DecoderImpl::Decode(const EncodedImage& input_image,

+ bool /*missing_frames*/,

+ const RTPFragmentationHeader* /*fragmentation*/,

+ const CodecSpecificInfo* codec_specific_info,

+ int64_t /*render_time_ms*/) {

+ if (!IsInitialized())

+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

+ if (!decoded_image_callback_) {

+ LOG(LS_WARNING) << "InitDecode() has been called, but a callback function "

+ "has not been set with RegisterDecodeCompleteCallback()";

+ return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

+ }

+ if (!input_image._buffer || !input_image._length)

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ if (codec_specific_info &&

+ codec_specific_info->codecType != kVideoCodecH264) {

+ return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;

+ }

+ AVPacket packet;

+ av_init_packet(&packet);

+ // TODO(hbos): "The input buffer must be AV_INPUT_BUFFER_PADDING_SIZE larger

+ // than the actual read bytes because some optimized bitstream readers read 32

+ // or 64 bits at once and could read over the end." See avcodec_decode_video2.

+ // - Is this an issue? Do we have to make sure EncodedImage is allocated with

palmer 2016/01/11 22:27:09 Yes, this is an issue. Round the allocation size u

hbos 2016/01/12 13:56:27 Will address in follow-up CL. There is a bug creat

+ // additional bytes or do we have to do an otherwise unnecessary copy? Might

+ // only be a problem with non-mul-32 frame widths?

+ // ("If the first 23 bits of the additional bytes are not 0, then damaged MPEG

+ // bitstreams could cause overread and segfault.")

+ // See issue: https://bugs.chromium.org/p/webrtc/issues/detail?id=5424

+ packet.data = input_image._buffer;

+ packet.size = input_image._length;

+ av_context_->reordered_opaque = input_image.ntp_time_ms_ * 1000; // ms -> μs

+ int frame_decoded = 0;

+ int result = avcodec_decode_video2(av_context_.get(),

+ av_frame_.get(),

+ &frame_decoded,

+ &packet);

+ if (result < 0) {

+ LOG(LS_ERROR) << "avcodec_decode_video2 error: " << result;

+ return WEBRTC_VIDEO_CODEC_ERROR;

+ }

+ // |result| is number of bytes used, which should be all of them.

+ if (result != packet.size) {

+ LOG(LS_ERROR) << "avcodec_decode_video2 consumed " << result << " bytes "

+ "when " << packet.size << " bytes were expected.";

+ return WEBRTC_VIDEO_CODEC_ERROR;

+ }

+ if (!frame_decoded) {

+ LOG(LS_WARNING) << "avcodec_decode_video2 successful but no frame was "

+ "decoded.";

+ return WEBRTC_VIDEO_CODEC_OK;

+ }

+ // Obtain the |video_frame| containing the decoded image.

+ VideoFrame* video_frame = static_cast<VideoFrame*>(

+ av_buffer_get_opaque(av_frame_->buf[0]));

+ RTC_DCHECK(video_frame);

+ RTC_CHECK_EQ(av_frame_->data[kYPlane], video_frame->buffer(kYPlane));

+ RTC_CHECK_EQ(av_frame_->data[kUPlane], video_frame->buffer(kUPlane));

+ RTC_CHECK_EQ(av_frame_->data[kVPlane], video_frame->buffer(kVPlane));

+ video_frame->set_timestamp(input_image._timeStamp);

+ // Return decoded frame.

+ int32_t ret = decoded_image_callback_->Decoded(*video_frame);

+ // Stop referencing it, possibly freeing |video_frame|.

+ av_frame_unref(av_frame_.get());

+ video_frame = nullptr;

+ if (ret) {

+ LOG(LS_WARNING) << "DecodedImageCallback::Decoded returned " << ret;

+ return ret;

+ }

+ return WEBRTC_VIDEO_CODEC_OK;

+bool H264DecoderImpl::IsInitialized() const {

+ return av_context_ != nullptr;

+} // namespace webrtc