OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ | |
12 #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ | |
13 | |
14 #include "webrtc/common_types.h" | |
15 #include "webrtc/typedefs.h" | |
16 | |
17 /******************************************************/ | |
18 /* Quality Modes: Resolution and Robustness settings */ | |
19 /******************************************************/ | |
20 | |
21 namespace webrtc { | |
22 struct VideoContentMetrics; | |
23 | |
24 struct VCMResolutionScale { | |
25 VCMResolutionScale() | |
26 : codec_width(640), | |
27 codec_height(480), | |
28 frame_rate(30.0f), | |
29 spatial_width_fact(1.0f), | |
30 spatial_height_fact(1.0f), | |
31 temporal_fact(1.0f), | |
32 change_resolution_spatial(false), | |
33 change_resolution_temporal(false) {} | |
34 uint16_t codec_width; | |
35 uint16_t codec_height; | |
36 float frame_rate; | |
37 float spatial_width_fact; | |
38 float spatial_height_fact; | |
39 float temporal_fact; | |
40 bool change_resolution_spatial; | |
41 bool change_resolution_temporal; | |
42 }; | |
43 | |
44 enum ImageType { | |
45 kQCIF = 0, // 176x144 | |
46 kHCIF, // 264x216 = half(~3/4x3/4) CIF. | |
47 kQVGA, // 320x240 = quarter VGA. | |
48 kCIF, // 352x288 | |
49 kHVGA, // 480x360 = half(~3/4x3/4) VGA. | |
50 kVGA, // 640x480 | |
51 kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD. | |
52 kWHD, // 1280x720 | |
53 kFULLHD, // 1920x1080 | |
54 kNumImageTypes | |
55 }; | |
56 | |
57 const uint32_t kSizeOfImageType[kNumImageTypes] = { | |
58 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600}; | |
59 | |
60 enum FrameRateLevelClass { | |
61 kFrameRateLow, | |
62 kFrameRateMiddle1, | |
63 kFrameRateMiddle2, | |
64 kFrameRateHigh | |
65 }; | |
66 | |
67 enum ContentLevelClass { kLow, kHigh, kDefault }; | |
68 | |
69 struct VCMContFeature { | |
70 VCMContFeature() : value(0.0f), level(kDefault) {} | |
71 void Reset() { | |
72 value = 0.0f; | |
73 level = kDefault; | |
74 } | |
75 float value; | |
76 ContentLevelClass level; | |
77 }; | |
78 | |
79 enum UpDownAction { kUpResolution, kDownResolution }; | |
80 | |
81 enum SpatialAction { | |
82 kNoChangeSpatial, | |
83 kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction. | |
84 kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction. | |
85 kNumModesSpatial | |
86 }; | |
87 | |
88 enum TemporalAction { | |
89 kNoChangeTemporal, | |
90 kTwoThirdsTemporal, // 2/3 frame rate reduction | |
91 kOneHalfTemporal, // 1/2 frame rate reduction | |
92 kNumModesTemporal | |
93 }; | |
94 | |
95 struct ResolutionAction { | |
96 ResolutionAction() : spatial(kNoChangeSpatial), temporal(kNoChangeTemporal) {} | |
97 SpatialAction spatial; | |
98 TemporalAction temporal; | |
99 }; | |
100 | |
101 // Down-sampling factors for spatial (width and height), and temporal. | |
102 const float kFactorWidthSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f}; | |
103 | |
104 const float kFactorHeightSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f}; | |
105 | |
106 const float kFactorTemporal[kNumModesTemporal] = {1.0f, 1.5f, 2.0f}; | |
107 | |
108 enum EncoderState { | |
109 kStableEncoding, // Low rate mis-match, stable buffer levels. | |
110 kStressedEncoding, // Significant over-shooting of target rate, | |
111 // Buffer under-flow, etc. | |
112 kEasyEncoding // Significant under-shooting of target rate. | |
113 }; | |
114 | |
115 // QmMethod class: main class for resolution and robustness settings | |
116 | |
117 class VCMQmMethod { | |
118 public: | |
119 VCMQmMethod(); | |
120 virtual ~VCMQmMethod(); | |
121 | |
122 // Reset values | |
123 void ResetQM(); | |
124 virtual void Reset() = 0; | |
125 | |
126 // Compute content class. | |
127 uint8_t ComputeContentClass(); | |
128 | |
129 // Update with the content metrics. | |
130 void UpdateContent(const VideoContentMetrics* content_metrics); | |
131 | |
132 // Compute spatial texture magnitude and level. | |
133 // Spatial texture is a spatial prediction error measure. | |
134 void ComputeSpatial(); | |
135 | |
136 // Compute motion magnitude and level for NFD metric. | |
137 // NFD is normalized frame difference (normalized by spatial variance). | |
138 void ComputeMotionNFD(); | |
139 | |
140 // Get the imageType (CIF, VGA, HD, etc) for the system width/height. | |
141 ImageType GetImageType(uint16_t width, uint16_t height); | |
142 | |
143 // Return the closest image type. | |
144 ImageType FindClosestImageType(uint16_t width, uint16_t height); | |
145 | |
146 // Get the frame rate level. | |
147 FrameRateLevelClass FrameRateLevel(float frame_rate); | |
148 | |
149 protected: | |
150 // Content Data. | |
151 const VideoContentMetrics* content_metrics_; | |
152 | |
153 // Encoder frame sizes and native frame sizes. | |
154 uint16_t width_; | |
155 uint16_t height_; | |
156 float user_frame_rate_; | |
157 uint16_t native_width_; | |
158 uint16_t native_height_; | |
159 float native_frame_rate_; | |
160 float aspect_ratio_; | |
161 // Image type and frame rate leve, for the current encoder resolution. | |
162 ImageType image_type_; | |
163 FrameRateLevelClass framerate_level_; | |
164 // Content class data. | |
165 VCMContFeature motion_; | |
166 VCMContFeature spatial_; | |
167 uint8_t content_class_; | |
168 bool init_; | |
169 }; | |
170 | |
171 // Resolution settings class | |
172 | |
173 class VCMQmResolution : public VCMQmMethod { | |
174 public: | |
175 VCMQmResolution(); | |
176 virtual ~VCMQmResolution(); | |
177 | |
178 // Reset all quantities. | |
179 virtual void Reset(); | |
180 | |
181 // Reset rate quantities and counters after every SelectResolution() call. | |
182 void ResetRates(); | |
183 | |
184 // Reset down-sampling state. | |
185 void ResetDownSamplingState(); | |
186 | |
187 // Get the encoder state. | |
188 EncoderState GetEncoderState(); | |
189 | |
190 // Initialize after SetEncodingData in media_opt. | |
191 int Initialize(float bitrate, | |
192 float user_framerate, | |
193 uint16_t width, | |
194 uint16_t height, | |
195 int num_layers); | |
196 | |
197 // Update the encoder frame size. | |
198 void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height); | |
199 | |
200 // Update with actual bit rate (size of the latest encoded frame) | |
201 // and frame type, after every encoded frame. | |
202 void UpdateEncodedSize(size_t encoded_size); | |
203 | |
204 // Update with new target bitrate, actual encoder sent rate, frame_rate, | |
205 // loss rate: every ~1 sec from SetTargetRates in media_opt. | |
206 void UpdateRates(float target_bitrate, | |
207 float encoder_sent_rate, | |
208 float incoming_framerate, | |
209 uint8_t packet_loss); | |
210 | |
211 // Extract ST (spatio-temporal) resolution action. | |
212 // Inputs: qm: Reference to the quality modes pointer. | |
213 // Output: the spatial and/or temporal scale change. | |
214 int SelectResolution(VCMResolutionScale** qm); | |
215 | |
216 private: | |
217 // Set the default resolution action. | |
218 void SetDefaultAction(); | |
219 | |
220 // Compute rates for the selection of down-sampling action. | |
221 void ComputeRatesForSelection(); | |
222 | |
223 // Compute the encoder state. | |
224 void ComputeEncoderState(); | |
225 | |
226 // Return true if the action is to go back up in resolution. | |
227 bool GoingUpResolution(); | |
228 | |
229 // Return true if the action is to go down in resolution. | |
230 bool GoingDownResolution(); | |
231 | |
232 // Check the condition for going up in resolution by the scale factors: | |
233 // |facWidth|, |facHeight|, |facTemp|. | |
234 // |scaleFac| is a scale factor for the transition rate. | |
235 bool ConditionForGoingUp(float fac_width, | |
236 float fac_height, | |
237 float fac_temp, | |
238 float scale_fac); | |
239 | |
240 // Get the bitrate threshold for the resolution action. | |
241 // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action. | |
242 // |scaleFac| is a scale factor for the transition rate. | |
243 float GetTransitionRate(float fac_width, | |
244 float fac_height, | |
245 float fac_temp, | |
246 float scale_fac); | |
247 | |
248 // Update the down-sampling state. | |
249 void UpdateDownsamplingState(UpDownAction up_down); | |
250 | |
251 // Update the codec frame size and frame rate. | |
252 void UpdateCodecResolution(); | |
253 | |
254 // Return a state based on average target rate relative transition rate. | |
255 uint8_t RateClass(float transition_rate); | |
256 | |
257 // Adjust the action selected from the table. | |
258 void AdjustAction(); | |
259 | |
260 // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2. | |
261 void ConvertSpatialFractionalToWhole(); | |
262 | |
263 // Returns true if the new frame sizes, under the selected spatial action, | |
264 // are of even size. | |
265 bool EvenFrameSize(); | |
266 | |
267 // Insert latest down-sampling action into the history list. | |
268 void InsertLatestDownAction(); | |
269 | |
270 // Remove the last (first element) down-sampling action from the list. | |
271 void RemoveLastDownAction(); | |
272 | |
273 // Check constraints on the amount of down-sampling allowed. | |
274 void ConstrainAmountOfDownSampling(); | |
275 | |
276 // For going up in resolution: pick spatial or temporal action, | |
277 // if both actions were separately selected. | |
278 void PickSpatialOrTemporal(); | |
279 | |
280 // Select the directional (1x2 or 2x1) spatial down-sampling action. | |
281 void SelectSpatialDirectionMode(float transition_rate); | |
282 | |
283 enum { kDownActionHistorySize = 10 }; | |
284 | |
285 VCMResolutionScale* qm_; | |
286 // Encoder rate control parameters. | |
287 float target_bitrate_; | |
288 float incoming_framerate_; | |
289 float per_frame_bandwidth_; | |
290 float buffer_level_; | |
291 | |
292 // Data accumulated every ~1sec from MediaOpt. | |
293 float sum_target_rate_; | |
294 float sum_incoming_framerate_; | |
295 float sum_rate_MM_; | |
296 float sum_rate_MM_sgn_; | |
297 float sum_packet_loss_; | |
298 // Counters. | |
299 uint32_t frame_cnt_; | |
300 uint32_t frame_cnt_delta_; | |
301 uint32_t update_rate_cnt_; | |
302 uint32_t low_buffer_cnt_; | |
303 | |
304 // Resolution state parameters. | |
305 float state_dec_factor_spatial_; | |
306 float state_dec_factor_temporal_; | |
307 | |
308 // Quantities used for selection. | |
309 float avg_target_rate_; | |
310 float avg_incoming_framerate_; | |
311 float avg_ratio_buffer_low_; | |
312 float avg_rate_mismatch_; | |
313 float avg_rate_mismatch_sgn_; | |
314 float avg_packet_loss_; | |
315 EncoderState encoder_state_; | |
316 ResolutionAction action_; | |
317 // Short history of the down-sampling actions from the Initialize() state. | |
318 // This is needed for going up in resolution. Since the total amount of | |
319 // down-sampling actions are constrained, the length of the list need not be | |
320 // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample. | |
321 ResolutionAction down_action_history_[kDownActionHistorySize]; | |
322 int num_layers_; | |
323 }; | |
324 | |
325 } // namespace webrtc | |
326 #endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ | |
OLD | NEW |