webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc - Issue 1979443004: Add H264 bitstream rewriting to limit frame reordering marker in header

Side by Side Diff: webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc

Issue 1979443004: Add H264 bitstream rewriting to limit frame reordering marker in header (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Fixed compiler warning on win Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/rtp_rtcp/source/h264/sps_parser_unittest.cc ('K') | « webrtc/modules/rtp_rtcp/source/h264_sps_parser.h ('k') | webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc » ('j') | webrtc/modules/rtp_rtcp/source/rtp_format_h264.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 /*

2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.

3 *

4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.

9 */

10

11 #include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"

12

13 #include "webrtc/base/bitbuffer.h"

14 #include "webrtc/base/bytebuffer.h"

15 #include "webrtc/base/logging.h"

16

17 #define RETURN_FALSE_ON_FAIL(x) \

18 if (!(x)) { \

19 return false; \

20 }

21

22 namespace webrtc {

23

24 H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length)

25 : sps_(sps), byte_length_(byte_length), width_(), height_() {

26 }

27

28 bool H264SpsParser::Parse() {

29 // General note: this is based off the 02/2014 version of the H.264 standard.

30 // You can find it on this page:

31 // http://www.itu.int/rec/T-REC-H.264

32

33 const char* sps_bytes = reinterpret_cast<const char*>(sps_);

34 // First, parse out rbsp, which is basically the source buffer minus emulation

35 // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in

36 // section 7.3.1 of the H.264 standard.

37 rtc::ByteBufferWriter rbsp_buffer;

38 for (size_t i = 0; i < byte_length_;) {

39 // Be careful about over/underflow here. byte_length_ - 3 can underflow, and

40 // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_

41 // above, and that expression will produce the number of bytes left in

42 // the stream including the byte at i.

43 if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 &&

44 sps_[i + 2] == 3) {

45 // Two rbsp bytes + the emulation byte.

46 rbsp_buffer.WriteBytes(sps_bytes + i, 2);

47 i += 3;

48 } else {

49 // Single rbsp byte.

50 rbsp_buffer.WriteBytes(sps_bytes + i, 1);

51 i++;

52 }

53 }

54

55 // Now, we need to use a bit buffer to parse through the actual AVC SPS

56 // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the

57 // H.264 standard for a complete description.

58 // Since we only care about resolution, we ignore the majority of fields, but

59 // we still have to actively parse through a lot of the data, since many of

60 // the fields have variable size.

61 // We're particularly interested in:

62 // chroma_format_idc -> affects crop units

63 // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).

64 // frame_crop_*_offset -> crop information

65 rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(rbsp_buffer.Data()),

66 rbsp_buffer.Length());

67

68 // The golomb values we have to read, not just consume.

69 uint32_t golomb_ignored;

70

71 // separate_colour_plane_flag is optional (assumed 0), but has implications

72 // about the ChromaArrayType, which modifies how we treat crop coordinates.

73 uint32_t separate_colour_plane_flag = 0;

74 // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is

75 // 0. It defaults to 1, when not specified.

76 uint32_t chroma_format_idc = 1;

77

78 // profile_idc: u(8). We need it to determine if we need to read/skip chroma

79 // formats.

80 uint8_t profile_idc;

81 RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));

82 // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits

83 // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.

84 RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));

85 // level_idc: u(8)

86 RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));

87 // seq_parameter_set_id: ue(v)

88 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

89 // See if profile_idc has chroma format information.

90 if (profile_idc == 100 \|\| profile_idc == 110 \|\| profile_idc == 122 \|\|

91 profile_idc == 244 \|\| profile_idc == 44 \|\| profile_idc == 83 \|\|

92 profile_idc == 86 \|\| profile_idc == 118 \|\| profile_idc == 128 \|\|

93 profile_idc == 138 \|\| profile_idc == 139 \|\| profile_idc == 134) {

94 // chroma_format_idc: ue(v)

95 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));

96 if (chroma_format_idc == 3) {

97 // separate_colour_plane_flag: u(1)

98 RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));

99 }

100 // bit_depth_luma_minus8: ue(v)

101 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

102 // bit_depth_chroma_minus8: ue(v)

103 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

104 // qpprime_y_zero_transform_bypass_flag: u(1)

105 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));

106 // seq_scaling_matrix_present_flag: u(1)

107 uint32_t seq_scaling_matrix_present_flag;

108 RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));

109 if (seq_scaling_matrix_present_flag) {

110 // seq_scaling_list_present_flags. Either 8 or 12, depending on

111 // chroma_format_idc.

112 uint32_t seq_scaling_list_present_flags;

113 if (chroma_format_idc != 3) {

114 RETURN_FALSE_ON_FAIL(

115 parser.ReadBits(&seq_scaling_list_present_flags, 8));

116 } else {

117 RETURN_FALSE_ON_FAIL(

118 parser.ReadBits(&seq_scaling_list_present_flags, 12));

119 }

120 // We don't support reading the sequence scaling list, and we don't really

121 // see/use them in practice, so we'll just reject the full sps if we see

122 // any provided.

123 if (seq_scaling_list_present_flags > 0) {

124 LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";

125 return false;

126 }

127 }

128 }

129 // log2_max_frame_num_minus4: ue(v)

130 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

131 // pic_order_cnt_type: ue(v)

132 uint32_t pic_order_cnt_type;

133 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));

134 if (pic_order_cnt_type == 0) {

135 // log2_max_pic_order_cnt_lsb_minus4: ue(v)

136 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

137 } else if (pic_order_cnt_type == 1) {

138 // delta_pic_order_always_zero_flag: u(1)

139 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));

140 // offset_for_non_ref_pic: se(v)

141 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

142 // offset_for_top_to_bottom_field: se(v)

143 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

144 // num_ref_frames_in_pic_order_cnt_cycle: ue(v)

145 uint32_t num_ref_frames_in_pic_order_cnt_cycle;

146 RETURN_FALSE_ON_FAIL(

147 parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));

148 for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {

149 // offset_for_ref_frame[i]: se(v)

150 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

151 }

152 }

153 // max_num_ref_frames: ue(v)

154 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));

155 // gaps_in_frame_num_value_allowed_flag: u(1)

156 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));

157 //

158 // IMPORTANT ONES! Now we're getting to resolution. First we read the pic

159 // width/height in macroblocks (16x16), which gives us the base resolution,

160 // and then we continue on until we hit the frame crop offsets, which are used

161 // to signify resolutions that aren't multiples of 16.

162 //

163 // pic_width_in_mbs_minus1: ue(v)

164 uint32_t pic_width_in_mbs_minus1;

165 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));

166 // pic_height_in_map_units_minus1: ue(v)

167 uint32_t pic_height_in_map_units_minus1;

168 RETURN_FALSE_ON_FAIL(

169 parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));

170 // frame_mbs_only_flag: u(1)

171 uint32_t frame_mbs_only_flag;

172 RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));

173 if (!frame_mbs_only_flag) {

174 // mb_adaptive_frame_field_flag: u(1)

175 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));

176 }

177 // direct_8x8_inference_flag: u(1)

178 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));

179 //

180 // MORE IMPORTANT ONES! Now we're at the frame crop information.

181 //

182 // frame_cropping_flag: u(1)

183 uint32_t frame_cropping_flag;

184 uint32_t frame_crop_left_offset = 0;

185 uint32_t frame_crop_right_offset = 0;

186 uint32_t frame_crop_top_offset = 0;

187 uint32_t frame_crop_bottom_offset = 0;

188 RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));

189 if (frame_cropping_flag) {

190 // frame_crop_{left, right, top, bottom}_offset: ue(v)

191 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));

192 RETURN_FALSE_ON_FAIL(

193 parser.ReadExponentialGolomb(&frame_crop_right_offset));

194 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));

195 RETURN_FALSE_ON_FAIL(

196 parser.ReadExponentialGolomb(&frame_crop_bottom_offset));

197 }

198

199 // Far enough! We don't use the rest of the SPS.

200

201 // Start with the resolution determined by the pic_width/pic_height fields.

202 int width = 16 * (pic_width_in_mbs_minus1 + 1);

203 int height =

204 16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);

205

206 // Figure out the crop units in pixels. That's based on the chroma format's

207 // sampling, which is indicated by chroma_format_idc.

208 if (separate_colour_plane_flag \|\| chroma_format_idc == 0) {

209 frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);

210 frame_crop_top_offset *= (2 - frame_mbs_only_flag);

211 } else if (!separate_colour_plane_flag && chroma_format_idc > 0) {

212 // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).

213 if (chroma_format_idc == 1 \|\| chroma_format_idc == 2) {

214 frame_crop_left_offset *= 2;

215 frame_crop_right_offset *= 2;

216 }

217 // Height multipliers for format 1 (4:2:0).

218 if (chroma_format_idc == 1) {

219 frame_crop_top_offset *= 2;

220 frame_crop_bottom_offset *= 2;

221 }

222 }

223 // Subtract the crop for each dimension.

224 width -= (frame_crop_left_offset + frame_crop_right_offset);

225 height -= (frame_crop_top_offset + frame_crop_bottom_offset);

226

227 width_ = width;

228 height_ = height;

229 return true;

230 }

231

232 } // namespace webrtc

OLD	NEW