OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h" | |
12 | |
13 #include "webrtc/base/bitbuffer.h" | |
14 #include "webrtc/base/bytebuffer.h" | |
15 #include "webrtc/base/logging.h" | |
16 | |
17 #define RETURN_FALSE_ON_FAIL(x) \ | |
18 if (!(x)) { \ | |
19 return false; \ | |
20 } | |
21 | |
22 namespace webrtc { | |
23 | |
24 H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length) | |
25 : sps_(sps), byte_length_(byte_length), width_(), height_() { | |
26 } | |
27 | |
28 bool H264SpsParser::Parse() { | |
29 // General note: this is based off the 02/2014 version of the H.264 standard. | |
30 // You can find it on this page: | |
31 // http://www.itu.int/rec/T-REC-H.264 | |
32 | |
33 const char* sps_bytes = reinterpret_cast<const char*>(sps_); | |
34 // First, parse out rbsp, which is basically the source buffer minus emulation | |
35 // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in | |
36 // section 7.3.1 of the H.264 standard. | |
37 rtc::ByteBufferWriter rbsp_buffer; | |
38 for (size_t i = 0; i < byte_length_;) { | |
39 // Be careful about over/underflow here. byte_length_ - 3 can underflow, and | |
40 // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ | |
41 // above, and that expression will produce the number of bytes left in | |
42 // the stream including the byte at i. | |
43 if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 && | |
44 sps_[i + 2] == 3) { | |
45 // Two rbsp bytes + the emulation byte. | |
46 rbsp_buffer.WriteBytes(sps_bytes + i, 2); | |
47 i += 3; | |
48 } else { | |
49 // Single rbsp byte. | |
50 rbsp_buffer.WriteBytes(sps_bytes + i, 1); | |
51 i++; | |
52 } | |
53 } | |
54 | |
55 // Now, we need to use a bit buffer to parse through the actual AVC SPS | |
56 // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the | |
57 // H.264 standard for a complete description. | |
58 // Since we only care about resolution, we ignore the majority of fields, but | |
59 // we still have to actively parse through a lot of the data, since many of | |
60 // the fields have variable size. | |
61 // We're particularly interested in: | |
62 // chroma_format_idc -> affects crop units | |
63 // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). | |
64 // frame_crop_*_offset -> crop information | |
65 rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(rbsp_buffer.Data()), | |
66 rbsp_buffer.Length()); | |
67 | |
68 // The golomb values we have to read, not just consume. | |
69 uint32_t golomb_ignored; | |
70 | |
71 // separate_colour_plane_flag is optional (assumed 0), but has implications | |
72 // about the ChromaArrayType, which modifies how we treat crop coordinates. | |
73 uint32_t separate_colour_plane_flag = 0; | |
74 // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is | |
75 // 0. It defaults to 1, when not specified. | |
76 uint32_t chroma_format_idc = 1; | |
77 | |
78 // profile_idc: u(8). We need it to determine if we need to read/skip chroma | |
79 // formats. | |
80 uint8_t profile_idc; | |
81 RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc)); | |
82 // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits | |
83 // 1 bit each for the flags + 2 bits = 8 bits = 1 byte. | |
84 RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); | |
85 // level_idc: u(8) | |
86 RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); | |
87 // seq_parameter_set_id: ue(v) | |
88 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
89 // See if profile_idc has chroma format information. | |
90 if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || | |
91 profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || | |
92 profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || | |
93 profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { | |
94 // chroma_format_idc: ue(v) | |
95 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc)); | |
96 if (chroma_format_idc == 3) { | |
97 // separate_colour_plane_flag: u(1) | |
98 RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1)); | |
99 } | |
100 // bit_depth_luma_minus8: ue(v) | |
101 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
102 // bit_depth_chroma_minus8: ue(v) | |
103 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
104 // qpprime_y_zero_transform_bypass_flag: u(1) | |
105 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); | |
106 // seq_scaling_matrix_present_flag: u(1) | |
107 uint32_t seq_scaling_matrix_present_flag; | |
108 RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1)); | |
109 if (seq_scaling_matrix_present_flag) { | |
110 // seq_scaling_list_present_flags. Either 8 or 12, depending on | |
111 // chroma_format_idc. | |
112 uint32_t seq_scaling_list_present_flags; | |
113 if (chroma_format_idc != 3) { | |
114 RETURN_FALSE_ON_FAIL( | |
115 parser.ReadBits(&seq_scaling_list_present_flags, 8)); | |
116 } else { | |
117 RETURN_FALSE_ON_FAIL( | |
118 parser.ReadBits(&seq_scaling_list_present_flags, 12)); | |
119 } | |
120 // We don't support reading the sequence scaling list, and we don't really | |
121 // see/use them in practice, so we'll just reject the full sps if we see | |
122 // any provided. | |
123 if (seq_scaling_list_present_flags > 0) { | |
124 LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported."; | |
125 return false; | |
126 } | |
127 } | |
128 } | |
129 // log2_max_frame_num_minus4: ue(v) | |
130 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
131 // pic_order_cnt_type: ue(v) | |
132 uint32_t pic_order_cnt_type; | |
133 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type)); | |
134 if (pic_order_cnt_type == 0) { | |
135 // log2_max_pic_order_cnt_lsb_minus4: ue(v) | |
136 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
137 } else if (pic_order_cnt_type == 1) { | |
138 // delta_pic_order_always_zero_flag: u(1) | |
139 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); | |
140 // offset_for_non_ref_pic: se(v) | |
141 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
142 // offset_for_top_to_bottom_field: se(v) | |
143 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
144 // num_ref_frames_in_pic_order_cnt_cycle: ue(v) | |
145 uint32_t num_ref_frames_in_pic_order_cnt_cycle; | |
146 RETURN_FALSE_ON_FAIL( | |
147 parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); | |
148 for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { | |
149 // offset_for_ref_frame[i]: se(v) | |
150 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
151 } | |
152 } | |
153 // max_num_ref_frames: ue(v) | |
154 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); | |
155 // gaps_in_frame_num_value_allowed_flag: u(1) | |
156 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); | |
157 // | |
158 // IMPORTANT ONES! Now we're getting to resolution. First we read the pic | |
159 // width/height in macroblocks (16x16), which gives us the base resolution, | |
160 // and then we continue on until we hit the frame crop offsets, which are used | |
161 // to signify resolutions that aren't multiples of 16. | |
162 // | |
163 // pic_width_in_mbs_minus1: ue(v) | |
164 uint32_t pic_width_in_mbs_minus1; | |
165 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1)); | |
166 // pic_height_in_map_units_minus1: ue(v) | |
167 uint32_t pic_height_in_map_units_minus1; | |
168 RETURN_FALSE_ON_FAIL( | |
169 parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1)); | |
170 // frame_mbs_only_flag: u(1) | |
171 uint32_t frame_mbs_only_flag; | |
172 RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1)); | |
173 if (!frame_mbs_only_flag) { | |
174 // mb_adaptive_frame_field_flag: u(1) | |
175 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); | |
176 } | |
177 // direct_8x8_inference_flag: u(1) | |
178 RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); | |
179 // | |
180 // MORE IMPORTANT ONES! Now we're at the frame crop information. | |
181 // | |
182 // frame_cropping_flag: u(1) | |
183 uint32_t frame_cropping_flag; | |
184 uint32_t frame_crop_left_offset = 0; | |
185 uint32_t frame_crop_right_offset = 0; | |
186 uint32_t frame_crop_top_offset = 0; | |
187 uint32_t frame_crop_bottom_offset = 0; | |
188 RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1)); | |
189 if (frame_cropping_flag) { | |
190 // frame_crop_{left, right, top, bottom}_offset: ue(v) | |
191 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset)); | |
192 RETURN_FALSE_ON_FAIL( | |
193 parser.ReadExponentialGolomb(&frame_crop_right_offset)); | |
194 RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset)); | |
195 RETURN_FALSE_ON_FAIL( | |
196 parser.ReadExponentialGolomb(&frame_crop_bottom_offset)); | |
197 } | |
198 | |
199 // Far enough! We don't use the rest of the SPS. | |
200 | |
201 // Start with the resolution determined by the pic_width/pic_height fields. | |
202 int width = 16 * (pic_width_in_mbs_minus1 + 1); | |
203 int height = | |
204 16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); | |
205 | |
206 // Figure out the crop units in pixels. That's based on the chroma format's | |
207 // sampling, which is indicated by chroma_format_idc. | |
208 if (separate_colour_plane_flag || chroma_format_idc == 0) { | |
209 frame_crop_bottom_offset *= (2 - frame_mbs_only_flag); | |
210 frame_crop_top_offset *= (2 - frame_mbs_only_flag); | |
211 } else if (!separate_colour_plane_flag && chroma_format_idc > 0) { | |
212 // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). | |
213 if (chroma_format_idc == 1 || chroma_format_idc == 2) { | |
214 frame_crop_left_offset *= 2; | |
215 frame_crop_right_offset *= 2; | |
216 } | |
217 // Height multipliers for format 1 (4:2:0). | |
218 if (chroma_format_idc == 1) { | |
219 frame_crop_top_offset *= 2; | |
220 frame_crop_bottom_offset *= 2; | |
221 } | |
222 } | |
223 // Subtract the crop for each dimension. | |
224 width -= (frame_crop_left_offset + frame_crop_right_offset); | |
225 height -= (frame_crop_top_offset + frame_crop_bottom_offset); | |
226 | |
227 width_ = width; | |
228 height_ = height; | |
229 return true; | |
230 } | |
231 | |
232 } // namespace webrtc | |
OLD | NEW |