webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.cc - Issue 1187573004: iOS HW H264 support.

Side by Side Diff: webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.cc

Issue 1187573004: iOS HW H264 support. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Remove obsolete comment Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h ('k') | webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h » ('j') | webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 *

	10 */

	11

	12 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h"

	13

	14 #if defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)

	15

	16 #include <string>

	17 #include <vector>

	18

	19 #include "libyuv/convert_from.h"

	20 #include "webrtc/base/checks.h"

	21 #include "webrtc/base/logging.h"

	22 #include "webrtc/base/scoped_ptr.h"

	23 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h"

	24

	25 namespace internal {

	26

	27 // Convenience function for creating a dictionary.

	28 inline CFDictionaryRef CreateCFDictionary(CFTypeRef* keys,

	29 CFTypeRef* values,

	30 size_t size) {

	31 return CFDictionaryCreate(kCFAllocatorDefault, keys, values, size,

	32 &kCFTypeDictionaryKeyCallBacks,

	33 &kCFTypeDictionaryValueCallBacks);

	34 }

	35

	36 // Copies characters from a CFStringRef into a std::string.

	37 std::string CFStringToString(const CFStringRef cf_string) {

	38 DCHECK(cf_string);

	39 std::string std_string;

	40 // Get the size needed for UTF8 plus terminating character.

	41 size_t buffer_size =

	42 CFStringGetMaximumSizeForEncoding(CFStringGetLength(cf_string),

	43 kCFStringEncodingUTF8) +

	44 1;

	45 rtc::scoped_ptr<char[]> buffer(new char[buffer_size]);

	46 if (CFStringGetCString(cf_string, buffer.get(), buffer_size,

	47 kCFStringEncodingUTF8)) {

	48 // Copy over the characters.

	49 std_string.assign(buffer.get());

	50 }

	51 return std_string;

	52 }

	53

	54 // Convenience function for setting a VT property.

	55 void SetVTSessionProperty(VTSessionRef session,

	56 CFStringRef key,

	57 int32_t value) {

	58 CFNumberRef cfNum =

	59 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &value);

	60 OSStatus status = VTSessionSetProperty(session, key, cfNum);

	61 CFRelease(cfNum);

	62 if (status != noErr) {

	63 std::string key_string = CFStringToString(key);

	64 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string

	65 << " to " << value << ": " << status;

	66 }

	67 }

	68

	69 // Convenience function for setting a VT property.

	70 void SetVTSessionProperty(VTSessionRef session, CFStringRef key, bool value) {

	71 CFBooleanRef cf_bool = (value) ? kCFBooleanTrue : kCFBooleanFalse;

	72 OSStatus status = VTSessionSetProperty(session, key, cf_bool);

	73 if (status != noErr) {

	74 std::string key_string = CFStringToString(key);

	75 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string

	76 << " to " << value << ": " << status;

	77 }

	78 }

	79

	80 // Convenience function for setting a VT property.

	81 void SetVTSessionProperty(VTSessionRef session,

	82 CFStringRef key,

	83 CFStringRef value) {

	84 OSStatus status = VTSessionSetProperty(session, key, value);

	85 if (status != noErr) {

	86 std::string key_string = CFStringToString(key);

	87 std::string val_string = CFStringToString(value);

	88 LOG(LS_ERROR) << "VTSessionSetProperty failed to set: " << key_string

	89 << " to " << val_string << ": " << status;

	90 }

	91 }

	92

	93 // Struct that we pass to the encoder per frame to encode. We receive it again

	94 // in the encoder callback.

	95 struct FrameEncodeParams {

	96 FrameEncodeParams(webrtc::EncodedImageCallback* cb,

	97 const webrtc::CodecSpecificInfo* csi,

	98 int32_t w,

	99 int32_t h,

	100 int64_t rtms,

	101 uint32_t ts)

	102 : callback(cb),

	103 width(w),

	104 height(h),

	105 render_time_ms(rtms),

	106 timestamp(ts) {

	107 if (csi) {

	108 codec_specific_info = *csi;

	109 } else {

	110 codec_specific_info.codecType = webrtc::kVideoCodecH264;

	111 }

	112 }

	113 webrtc::EncodedImageCallback* callback;

	114 webrtc::CodecSpecificInfo codec_specific_info;

	115 int32_t width;

	116 int32_t height;

	117 int64_t render_time_ms;

	118 uint32_t timestamp;

	119 };

	120

	121 // We receive I420Frames as input, but we need to feed CVPixelBuffers into the

	122 // encoder. This performs the copy and format conversion.

	123 // TODO(tkchin): See if encoder will accept i420 frames and compare performance.

	124 bool CopyVideoFrameToPixelBuffer(const webrtc::VideoFrame& frame,

	125 CVPixelBufferRef pixel_buffer) {

	126 DCHECK(pixel_buffer);

	127 DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer) ==

	128 kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);

	129 DCHECK(CVPixelBufferGetHeightOfPlane(pixel_buffer, 0) ==

	130 static_cast<size_t>(frame.height()));

	131 DCHECK(CVPixelBufferGetWidthOfPlane(pixel_buffer, 0) ==

	132 static_cast<size_t>(frame.width()));

	133

	134 CVReturn cvRet = CVPixelBufferLockBaseAddress(pixel_buffer, 0);

	135 if (cvRet != kCVReturnSuccess) {

	136 LOG(LS_ERROR) << "Failed to lock base address: " << cvRet;

	137 return false;

	138 }

	139 uint8* dst_y = reinterpret_cast<uint8*>(

	140 CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 0));

	141 int dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 0);

	142 uint8* dst_uv = reinterpret_cast<uint8*>(

	143 CVPixelBufferGetBaseAddressOfPlane(pixel_buffer, 1));

	144 int dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer, 1);

	145 // Convert I420 to NV12.

	146 int ret = libyuv::I420ToNV12(

	147 frame.buffer(webrtc::kYPlane), frame.stride(webrtc::kYPlane),

	148 frame.buffer(webrtc::kUPlane), frame.stride(webrtc::kUPlane),

	149 frame.buffer(webrtc::kVPlane), frame.stride(webrtc::kVPlane),

	150 dst_y, dst_stride_y, dst_uv, dst_stride_uv,

	151 frame.width(), frame.height());

	152 CVPixelBufferUnlockBaseAddress(pixel_buffer, 0);

	153 if (ret) {

	154 LOG(LS_ERROR) << "Error converting I420 VideoFrame to NV12 :" << ret;

	155 return false;

	156 }

	157 return true;

	158 }

	159

	160 // This is the callback function that VideoToolbox calls when encode is

	161 // complete.

	162 void VTCompressionOutputCallback(void* encoder,

	163 void* params,

	164 OSStatus status,

	165 VTEncodeInfoFlags info_flags,

	166 CMSampleBufferRef sample_buffer) {

	167 rtc::scoped_ptr<FrameEncodeParams> encode_params(

	168 reinterpret_cast<FrameEncodeParams*>(params));

	169 if (status != noErr) {

	170 LOG(LS_ERROR) << "H264 encoding failed.";

	171 return;

	172 }

	173 if (info_flags & kVTEncodeInfo_FrameDropped) {

	174 LOG(LS_INFO) << "H264 encode dropped frame.";

	175 }

	176

	177 bool is_keyframe = false;

	178 CFArrayRef attachments =

	179 CMSampleBufferGetSampleAttachmentsArray(sample_buffer, 0);

	180 if (attachments != nullptr && CFArrayGetCount(attachments)) {

	181 CFDictionaryRef attachment =

	182 static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0));

	183 is_keyframe =

	184 !CFDictionaryContainsKey(attachment, kCMSampleAttachmentKey_NotSync);

	185 }

	186

	187 // Convert the sample buffer into a buffer suitable for RTP packetization.

	188 // TODO(tkchin): Allocate buffers through a pool.

	189 rtc::scoped_ptr<rtc::Buffer> buffer(new rtc::Buffer());

	190 rtc::scoped_ptr<webrtc::RTPFragmentationHeader> header;

	191 if (!H264CMSampleBufferToAnnexBBuffer(sample_buffer,

	192 is_keyframe,

	193 buffer.get(),

	194 header.accept())) {

	195 return;

	196 }

	197 webrtc::EncodedImage frame(buffer->data(), buffer->size(), buffer->size());

	198 frame._encodedWidth = encode_params->width;

	199 frame._encodedHeight = encode_params->height;

	200 frame._completeFrame = true;

	201 frame._frameType = is_keyframe ? webrtc::kKeyFrame : webrtc::kDeltaFrame;

	202 frame.capture_time_ms_ = encode_params->render_time_ms;

	203 frame._timeStamp = encode_params->timestamp;

	204

	205 int result = encode_params->callback->Encoded(

	206 frame, &(encode_params->codec_specific_info), header.get());

	207 if (result != 0) {

	208 LOG(LS_ERROR) << "Encoded callback failed: " << result;

	209 }

	210 }

	211

	212 } // namespace internal

	213

	214 namespace webrtc {

	215

	216 H264VideoToolboxEncoder::H264VideoToolboxEncoder()

	217 : callback_(nullptr), compression_session_(nullptr) {

	218 }

	219

	220 H264VideoToolboxEncoder::~H264VideoToolboxEncoder() {

	221 DestroyCompressionSession();

	222 }

	223

	224 int H264VideoToolboxEncoder::InitEncode(const VideoCodec* codec_settings,

	225 int number_of_cores,

	226 size_t max_payload_size) {

	227 DCHECK(codec_settings);

	228 DCHECK_EQ(codec_settings->codecType, kVideoCodecH264);

	229 // TODO(tkchin): We may need to enforce width/height dimension restrictions

	230 // to match what the encoder supports.

	231 width_ = codec_settings->width;

	232 height_ = codec_settings->height;

	233 // We can only set average bitrate on the HW encoder.

	234 bitrate_ = codec_settings->startBitrate * 1000;

	235

	236 // TODO(tkchin): Try setting payload size via

	237 // kVTCompressionPropertyKey_MaxH264SliceBytes.

	238

	239 return ResetCompressionSession();

	240 }

	241

	242 int H264VideoToolboxEncoder::Encode(

	243 const VideoFrame& input_image,

	244 const CodecSpecificInfo* codec_specific_info,

	245 const std::vector<VideoFrameType>* frame_types) {

	246 if (input_image.IsZeroSize()) {

	247 // It's possible to get zero sizes as a signal to produce keyframes (this

	248 // happens for internal sources). But this shouldn't happen in

	249 // webrtcvideoengine2.

	250 RTC_NOTREACHED();

	251 return WEBRTC_VIDEO_CODEC_OK;

	252 }

	253 if (!callback_ \|\| !compression_session_) {

	254 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;

	255 }

	256

	257 // Get a pixel buffer from the pool and copy frame data over.

	258 CVPixelBufferPoolRef pixel_buffer_pool =

	259 VTCompressionSessionGetPixelBufferPool(compression_session_);

	260 CVPixelBufferRef pixel_buffer = nullptr;

	261 CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool,

	262 &pixel_buffer);

	263 if (ret != kCVReturnSuccess) {

	264 LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;

	265 // We probably want to drop frames here, since failure probably means

	266 // that the pool is empty.

	267 return WEBRTC_VIDEO_CODEC_ERROR;

	268 }

	269 DCHECK(pixel_buffer);

	270 if (!internal::CopyVideoFrameToPixelBuffer(input_image, pixel_buffer)) {

	271 LOG(LS_ERROR) << "Failed to copy frame data.";

	272 CVBufferRelease(pixel_buffer);

	273 return WEBRTC_VIDEO_CODEC_ERROR;

	274 }

	275

	276 // Check if we need a keyframe.

	277 bool is_keyframe_required = false;

	278 if (frame_types) {

	279 for (auto frame_type : *frame_types) {

	280 if (frame_type == kKeyFrame) {

	281 is_keyframe_required = true;

	282 break;

	283 }

	284 }

	285 }

	286

	287 CMTime presentation_time_stamp =

	288 CMTimeMake(input_image.render_time_ms(), 1000);

	289 CFDictionaryRef frame_properties = nullptr;

	290 if (is_keyframe_required) {

	291 CFTypeRef keys[] = { kVTEncodeFrameOptionKey_ForceKeyFrame };

	292 CFTypeRef values[] = { kCFBooleanTrue };

	293 frame_properties = internal::CreateCFDictionary(keys, values, 1);

	294 }

	295 rtc::scoped_ptr<internal::FrameEncodeParams> encode_params;

	296 encode_params.reset(new internal::FrameEncodeParams(

	297 callback_, codec_specific_info, width_, height_,

	298 input_image.render_time_ms(), input_image.timestamp()));

	299 VTCompressionSessionEncodeFrame(

	300 compression_session_, pixel_buffer, presentation_time_stamp,

	301 kCMTimeInvalid, frame_properties, encode_params.release(), nullptr);

	302 if (frame_properties) {

	303 CFRelease(frame_properties);

	304 }

	305 if (pixel_buffer) {

	306 CVBufferRelease(pixel_buffer);

	307 }

	308 return WEBRTC_VIDEO_CODEC_OK;

	309 }

	310

	311 int H264VideoToolboxEncoder::RegisterEncodeCompleteCallback(

	312 EncodedImageCallback* callback) {

	313 callback_ = callback;

	314 return WEBRTC_VIDEO_CODEC_OK;

	315 }

	316

	317 int H264VideoToolboxEncoder::SetChannelParameters(uint32_t packet_loss,

	318 int64_t rtt) {

	319 // Encoder doesn't know anything about packet loss or rtt so just return.

	320 return WEBRTC_VIDEO_CODEC_OK;

	321 }

	322

	323 int H264VideoToolboxEncoder::SetRates(uint32_t new_bitrate_kbit,

	324 uint32_t frame_rate) {

	325 bitrate_ = new_bitrate_kbit * 1000;

	326 if (compression_session_) {

	327 internal::SetVTSessionProperty(compression_session_,

	328 kVTCompressionPropertyKey_AverageBitRate,

	329 bitrate_);

	330 }

	331 return WEBRTC_VIDEO_CODEC_OK;

	332 }

	333

	334 int H264VideoToolboxEncoder::Release() {

	335 callback_ = nullptr;

	336 // Need to reset to that the session is invalidated and won't use the

	337 // callback anymore.

	338 return ResetCompressionSession();

	339 }

	340

	341 int H264VideoToolboxEncoder::ResetCompressionSession() {

	342 DestroyCompressionSession();

	343

	344 // Set source image buffer attributes. These attributes will be present on

	345 // buffers retrieved from the encoder's pixel buffer pool.

	346 const size_t attributes_size = 3;

	347 CFTypeRef keys[attributes_size] = {

	348 #if defined(WEBRTC_IOS)

	349 kCVPixelBufferOpenGLESCompatibilityKey,

	350 #elif defined(WEBRTC_MAC)

	351 kCVPixelBufferOpenGLCompatibilityKey,

	352 #endif

	353 kCVPixelBufferIOSurfacePropertiesKey,

	354 kCVPixelBufferPixelFormatTypeKey

	355 };

	356 CFDictionaryRef io_surface_value =

	357 internal::CreateCFDictionary(nullptr, nullptr, 0);

	358 int64_t nv12type = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;

	359 CFNumberRef pixel_format =

	360 CFNumberCreate(nullptr, kCFNumberLongType, &nv12type);

	361 CFTypeRef values[attributes_size] = {

	362 kCFBooleanTrue,

	363 io_surface_value,

	364 pixel_format

	365 };

	366 CFDictionaryRef source_attributes =

	367 internal::CreateCFDictionary(keys, values, attributes_size);

	368 if (io_surface_value) {

	369 CFRelease(io_surface_value);

	370 io_surface_value = nullptr;

	371 }

	372 if (pixel_format) {

	373 CFRelease(pixel_format);

	374 pixel_format = nullptr;

	375 }

	376 OSStatus status = VTCompressionSessionCreate(

	377 nullptr, // use default allocator

	378 width_,

	379 height_,

	380 kCMVideoCodecType_H264,

	381 nullptr, // use default encoder

	382 source_attributes,

	383 nullptr, // use default compressed data allocator

	384 internal::VTCompressionOutputCallback,

	385 this,

	386 &compression_session_);

	387 if (source_attributes) {

	388 CFRelease(source_attributes);

	389 source_attributes = nullptr;

	390 }

	391 if (status != noErr) {

	392 LOG(LS_ERROR) << "Failed to create compression session: " << status;

	393 return WEBRTC_VIDEO_CODEC_ERROR;

	394 }

	395 ConfigureCompressionSession();

	396 return WEBRTC_VIDEO_CODEC_OK;

	397 }

	398

	399 void H264VideoToolboxEncoder::ConfigureCompressionSession() {

	400 DCHECK(compression_session_);

	401 internal::SetVTSessionProperty(compression_session_,

	402 kVTCompressionPropertyKey_RealTime, true);

	403 internal::SetVTSessionProperty(compression_session_,

	404 kVTCompressionPropertyKey_ProfileLevel,

	405 kVTProfileLevel_H264_Baseline_AutoLevel);

	406 internal::SetVTSessionProperty(

	407 compression_session_, kVTCompressionPropertyKey_AverageBitRate, bitrate_);

	408 internal::SetVTSessionProperty(compression_session_,

	409 kVTCompressionPropertyKey_AllowFrameReordering,

	410 false);

	411 // TODO(tkchin): Look at entropy mode and colorspace matrices.

	412 // TODO(tkchin): Investigate to see if there's any way to make this work.

	413 // May need it to interop with Android. Currently this call just fails.

	414 // On inspecting encoder output on iOS8, this value is set to 6.

	415 // internal::SetVTSessionProperty(compression_session_,

	416 // kVTCompressionPropertyKey_MaxFrameDelayCount,

	417 // 1);

	418 // TODO(tkchin): See if enforcing keyframe frequency is beneficial in any

	419 // way.

	420 // internal::SetVTSessionProperty(

	421 // compression_session_,

	422 // kVTCompressionPropertyKey_MaxKeyFrameInterval, 240);

	423 // internal::SetVTSessionProperty(

	424 // compression_session_,

	425 // kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration, 240);
	stefan-webrtc 2015/06/26 07:19:41 Do you have any idea what the default is? Wonderin Do you have any idea what the default is? Wondering if we should set it to 3000 as we do for VP8 and VP9. tkchin_webrtc 2015/06/26 20:28:09 No, I wanted to do all that work as part of a sepa Show quoted text On 2015/06/26 07:19:41, stefan-webrtc (holmer) wrote: > Do you have any idea what the default is? Wondering if we should set it to 3000 > as we do for VP8 and VP9. No, I wanted to do all that work as part of a separate CL that tunes the performance per the TODO.
	426 }

	427

	428 void H264VideoToolboxEncoder::DestroyCompressionSession() {

	429 if (compression_session_) {

	430 VTCompressionSessionInvalidate(compression_session_);

	431 CFRelease(compression_session_);

	432 compression_session_ = nullptr;

	433 }

	434 }

	435

	436 } // namespace webrtc

	437

	438 #endif // defined(WEBRTC_VIDEO_TOOLBOX_SUPPORTED)

OLD	NEW