OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/video_coding/qm_select.h" | |
12 | |
13 #include <math.h> | |
14 | |
15 #include "webrtc/modules/include/module_common_types.h" | |
16 #include "webrtc/modules/video_coding/include/video_coding_defines.h" | |
17 #include "webrtc/modules/video_coding/internal_defines.h" | |
18 #include "webrtc/modules/video_coding/qm_select_data.h" | |
19 #include "webrtc/system_wrappers/include/trace.h" | |
20 | |
21 namespace webrtc { | |
22 | |
23 // QM-METHOD class | |
24 | |
25 VCMQmMethod::VCMQmMethod() | |
26 : content_metrics_(NULL), | |
27 width_(0), | |
28 height_(0), | |
29 user_frame_rate_(0.0f), | |
30 native_width_(0), | |
31 native_height_(0), | |
32 native_frame_rate_(0.0f), | |
33 image_type_(kVGA), | |
34 framerate_level_(kFrameRateHigh), | |
35 init_(false) { | |
36 ResetQM(); | |
37 } | |
38 | |
39 VCMQmMethod::~VCMQmMethod() {} | |
40 | |
41 void VCMQmMethod::ResetQM() { | |
42 aspect_ratio_ = 1.0f; | |
43 motion_.Reset(); | |
44 spatial_.Reset(); | |
45 content_class_ = 0; | |
46 } | |
47 | |
48 uint8_t VCMQmMethod::ComputeContentClass() { | |
49 ComputeMotionNFD(); | |
50 ComputeSpatial(); | |
51 return content_class_ = 3 * motion_.level + spatial_.level; | |
52 } | |
53 | |
54 void VCMQmMethod::UpdateContent(const VideoContentMetrics* contentMetrics) { | |
55 content_metrics_ = contentMetrics; | |
56 } | |
57 | |
58 void VCMQmMethod::ComputeMotionNFD() { | |
59 if (content_metrics_) { | |
60 motion_.value = content_metrics_->motion_magnitude; | |
61 } | |
62 // Determine motion level. | |
63 if (motion_.value < kLowMotionNfd) { | |
64 motion_.level = kLow; | |
65 } else if (motion_.value > kHighMotionNfd) { | |
66 motion_.level = kHigh; | |
67 } else { | |
68 motion_.level = kDefault; | |
69 } | |
70 } | |
71 | |
72 void VCMQmMethod::ComputeSpatial() { | |
73 float spatial_err = 0.0; | |
74 float spatial_err_h = 0.0; | |
75 float spatial_err_v = 0.0; | |
76 if (content_metrics_) { | |
77 spatial_err = content_metrics_->spatial_pred_err; | |
78 spatial_err_h = content_metrics_->spatial_pred_err_h; | |
79 spatial_err_v = content_metrics_->spatial_pred_err_v; | |
80 } | |
81 // Spatial measure: take average of 3 prediction errors. | |
82 spatial_.value = (spatial_err + spatial_err_h + spatial_err_v) / 3.0f; | |
83 | |
84 // Reduce thresholds for large scenes/higher pixel correlation. | |
85 float scale2 = image_type_ > kVGA ? kScaleTexture : 1.0; | |
86 | |
87 if (spatial_.value > scale2 * kHighTexture) { | |
88 spatial_.level = kHigh; | |
89 } else if (spatial_.value < scale2 * kLowTexture) { | |
90 spatial_.level = kLow; | |
91 } else { | |
92 spatial_.level = kDefault; | |
93 } | |
94 } | |
95 | |
96 ImageType VCMQmMethod::GetImageType(uint16_t width, uint16_t height) { | |
97 // Get the image type for the encoder frame size. | |
98 uint32_t image_size = width * height; | |
99 if (image_size == kSizeOfImageType[kQCIF]) { | |
100 return kQCIF; | |
101 } else if (image_size == kSizeOfImageType[kHCIF]) { | |
102 return kHCIF; | |
103 } else if (image_size == kSizeOfImageType[kQVGA]) { | |
104 return kQVGA; | |
105 } else if (image_size == kSizeOfImageType[kCIF]) { | |
106 return kCIF; | |
107 } else if (image_size == kSizeOfImageType[kHVGA]) { | |
108 return kHVGA; | |
109 } else if (image_size == kSizeOfImageType[kVGA]) { | |
110 return kVGA; | |
111 } else if (image_size == kSizeOfImageType[kQFULLHD]) { | |
112 return kQFULLHD; | |
113 } else if (image_size == kSizeOfImageType[kWHD]) { | |
114 return kWHD; | |
115 } else if (image_size == kSizeOfImageType[kFULLHD]) { | |
116 return kFULLHD; | |
117 } else { | |
118 // No exact match, find closet one. | |
119 return FindClosestImageType(width, height); | |
120 } | |
121 } | |
122 | |
123 ImageType VCMQmMethod::FindClosestImageType(uint16_t width, uint16_t height) { | |
124 float size = static_cast<float>(width * height); | |
125 float min = size; | |
126 int isel = 0; | |
127 for (int i = 0; i < kNumImageTypes; ++i) { | |
128 float dist = fabs(size - kSizeOfImageType[i]); | |
129 if (dist < min) { | |
130 min = dist; | |
131 isel = i; | |
132 } | |
133 } | |
134 return static_cast<ImageType>(isel); | |
135 } | |
136 | |
137 FrameRateLevelClass VCMQmMethod::FrameRateLevel(float avg_framerate) { | |
138 if (avg_framerate <= kLowFrameRate) { | |
139 return kFrameRateLow; | |
140 } else if (avg_framerate <= kMiddleFrameRate) { | |
141 return kFrameRateMiddle1; | |
142 } else if (avg_framerate <= kHighFrameRate) { | |
143 return kFrameRateMiddle2; | |
144 } else { | |
145 return kFrameRateHigh; | |
146 } | |
147 } | |
148 | |
149 // RESOLUTION CLASS | |
150 | |
151 VCMQmResolution::VCMQmResolution() : qm_(new VCMResolutionScale()) { | |
152 Reset(); | |
153 } | |
154 | |
155 VCMQmResolution::~VCMQmResolution() { | |
156 delete qm_; | |
157 } | |
158 | |
159 void VCMQmResolution::ResetRates() { | |
160 sum_target_rate_ = 0.0f; | |
161 sum_incoming_framerate_ = 0.0f; | |
162 sum_rate_MM_ = 0.0f; | |
163 sum_rate_MM_sgn_ = 0.0f; | |
164 sum_packet_loss_ = 0.0f; | |
165 buffer_level_ = kInitBufferLevel * target_bitrate_; | |
166 frame_cnt_ = 0; | |
167 frame_cnt_delta_ = 0; | |
168 low_buffer_cnt_ = 0; | |
169 update_rate_cnt_ = 0; | |
170 } | |
171 | |
172 void VCMQmResolution::ResetDownSamplingState() { | |
173 state_dec_factor_spatial_ = 1.0; | |
174 state_dec_factor_temporal_ = 1.0; | |
175 for (int i = 0; i < kDownActionHistorySize; i++) { | |
176 down_action_history_[i].spatial = kNoChangeSpatial; | |
177 down_action_history_[i].temporal = kNoChangeTemporal; | |
178 } | |
179 } | |
180 | |
181 void VCMQmResolution::Reset() { | |
182 target_bitrate_ = 0.0f; | |
183 incoming_framerate_ = 0.0f; | |
184 buffer_level_ = 0.0f; | |
185 per_frame_bandwidth_ = 0.0f; | |
186 avg_target_rate_ = 0.0f; | |
187 avg_incoming_framerate_ = 0.0f; | |
188 avg_ratio_buffer_low_ = 0.0f; | |
189 avg_rate_mismatch_ = 0.0f; | |
190 avg_rate_mismatch_sgn_ = 0.0f; | |
191 avg_packet_loss_ = 0.0f; | |
192 encoder_state_ = kStableEncoding; | |
193 num_layers_ = 1; | |
194 ResetRates(); | |
195 ResetDownSamplingState(); | |
196 ResetQM(); | |
197 } | |
198 | |
199 EncoderState VCMQmResolution::GetEncoderState() { | |
200 return encoder_state_; | |
201 } | |
202 | |
203 // Initialize state after re-initializing the encoder, | |
204 // i.e., after SetEncodingData() in mediaOpt. | |
205 int VCMQmResolution::Initialize(float bitrate, | |
206 float user_framerate, | |
207 uint16_t width, | |
208 uint16_t height, | |
209 int num_layers) { | |
210 if (user_framerate == 0.0f || width == 0 || height == 0) { | |
211 return VCM_PARAMETER_ERROR; | |
212 } | |
213 Reset(); | |
214 target_bitrate_ = bitrate; | |
215 incoming_framerate_ = user_framerate; | |
216 UpdateCodecParameters(user_framerate, width, height); | |
217 native_width_ = width; | |
218 native_height_ = height; | |
219 native_frame_rate_ = user_framerate; | |
220 num_layers_ = num_layers; | |
221 // Initial buffer level. | |
222 buffer_level_ = kInitBufferLevel * target_bitrate_; | |
223 // Per-frame bandwidth. | |
224 per_frame_bandwidth_ = target_bitrate_ / user_framerate; | |
225 init_ = true; | |
226 return VCM_OK; | |
227 } | |
228 | |
229 void VCMQmResolution::UpdateCodecParameters(float frame_rate, | |
230 uint16_t width, | |
231 uint16_t height) { | |
232 width_ = width; | |
233 height_ = height; | |
234 // |user_frame_rate| is the target frame rate for VPM frame dropper. | |
235 user_frame_rate_ = frame_rate; | |
236 image_type_ = GetImageType(width, height); | |
237 } | |
238 | |
239 // Update rate data after every encoded frame. | |
240 void VCMQmResolution::UpdateEncodedSize(size_t encoded_size) { | |
241 frame_cnt_++; | |
242 // Convert to Kbps. | |
243 float encoded_size_kbits = 8.0f * static_cast<float>(encoded_size) / 1000.0f; | |
244 | |
245 // Update the buffer level: | |
246 // Note this is not the actual encoder buffer level. | |
247 // |buffer_level_| is reset to an initial value after SelectResolution is | |
248 // called, and does not account for frame dropping by encoder or VCM. | |
249 buffer_level_ += per_frame_bandwidth_ - encoded_size_kbits; | |
250 | |
251 // Counter for occurrences of low buffer level: | |
252 // low/negative values means encoder is likely dropping frames. | |
253 if (buffer_level_ <= kPercBufferThr * kInitBufferLevel * target_bitrate_) { | |
254 low_buffer_cnt_++; | |
255 } | |
256 } | |
257 | |
258 // Update various quantities after SetTargetRates in MediaOpt. | |
259 void VCMQmResolution::UpdateRates(float target_bitrate, | |
260 float encoder_sent_rate, | |
261 float incoming_framerate, | |
262 uint8_t packet_loss) { | |
263 // Sum the target bitrate: this is the encoder rate from previous update | |
264 // (~1sec), i.e, before the update for next ~1sec. | |
265 sum_target_rate_ += target_bitrate_; | |
266 update_rate_cnt_++; | |
267 | |
268 // Sum the received (from RTCP reports) packet loss rates. | |
269 sum_packet_loss_ += static_cast<float>(packet_loss / 255.0); | |
270 | |
271 // Sum the sequence rate mismatch: | |
272 // Mismatch here is based on the difference between the target rate | |
273 // used (in previous ~1sec) and the average actual encoding rate measured | |
274 // at previous ~1sec. | |
275 float diff = target_bitrate_ - encoder_sent_rate; | |
276 if (target_bitrate_ > 0.0) | |
277 sum_rate_MM_ += fabs(diff) / target_bitrate_; | |
278 int sgnDiff = diff > 0 ? 1 : (diff < 0 ? -1 : 0); | |
279 // To check for consistent under(+)/over_shooting(-) of target rate. | |
280 sum_rate_MM_sgn_ += sgnDiff; | |
281 | |
282 // Update with the current new target and frame rate: | |
283 // these values are ones the encoder will use for the current/next ~1sec. | |
284 target_bitrate_ = target_bitrate; | |
285 incoming_framerate_ = incoming_framerate; | |
286 sum_incoming_framerate_ += incoming_framerate_; | |
287 // Update the per_frame_bandwidth: | |
288 // this is the per_frame_bw for the current/next ~1sec. | |
289 per_frame_bandwidth_ = 0.0f; | |
290 if (incoming_framerate_ > 0.0f) { | |
291 per_frame_bandwidth_ = target_bitrate_ / incoming_framerate_; | |
292 } | |
293 } | |
294 | |
295 // Select the resolution factors: frame size and frame rate change (qm scales). | |
296 // Selection is for going down in resolution, or for going back up | |
297 // (if a previous down-sampling action was taken). | |
298 | |
299 // In the current version the following constraints are imposed: | |
300 // 1) We only allow for one action, either down or up, at a given time. | |
301 // 2) The possible down-sampling actions are: spatial by 1/2x1/2, 3/4x3/4; | |
302 // temporal/frame rate reduction by 1/2 and 2/3. | |
303 // 3) The action for going back up is the reverse of last (spatial or temporal) | |
304 // down-sampling action. The list of down-sampling actions from the | |
305 // Initialize() state are kept in |down_action_history_|. | |
306 // 4) The total amount of down-sampling (spatial and/or temporal) from the | |
307 // Initialize() state (native resolution) is limited by various factors. | |
308 int VCMQmResolution::SelectResolution(VCMResolutionScale** qm) { | |
309 if (!init_) { | |
310 return VCM_UNINITIALIZED; | |
311 } | |
312 if (content_metrics_ == NULL) { | |
313 Reset(); | |
314 *qm = qm_; | |
315 return VCM_OK; | |
316 } | |
317 | |
318 // Check conditions on down-sampling state. | |
319 assert(state_dec_factor_spatial_ >= 1.0f); | |
320 assert(state_dec_factor_temporal_ >= 1.0f); | |
321 assert(state_dec_factor_spatial_ <= kMaxSpatialDown); | |
322 assert(state_dec_factor_temporal_ <= kMaxTempDown); | |
323 assert(state_dec_factor_temporal_ * state_dec_factor_spatial_ <= | |
324 kMaxTotalDown); | |
325 | |
326 // Compute content class for selection. | |
327 content_class_ = ComputeContentClass(); | |
328 // Compute various rate quantities for selection. | |
329 ComputeRatesForSelection(); | |
330 | |
331 // Get the encoder state. | |
332 ComputeEncoderState(); | |
333 | |
334 // Default settings: no action. | |
335 SetDefaultAction(); | |
336 *qm = qm_; | |
337 | |
338 // Check for going back up in resolution, if we have had some down-sampling | |
339 // relative to native state in Initialize(). | |
340 if (down_action_history_[0].spatial != kNoChangeSpatial || | |
341 down_action_history_[0].temporal != kNoChangeTemporal) { | |
342 if (GoingUpResolution()) { | |
343 *qm = qm_; | |
344 return VCM_OK; | |
345 } | |
346 } | |
347 | |
348 // Check for going down in resolution. | |
349 if (GoingDownResolution()) { | |
350 *qm = qm_; | |
351 return VCM_OK; | |
352 } | |
353 return VCM_OK; | |
354 } | |
355 | |
356 void VCMQmResolution::SetDefaultAction() { | |
357 qm_->codec_width = width_; | |
358 qm_->codec_height = height_; | |
359 qm_->frame_rate = user_frame_rate_; | |
360 qm_->change_resolution_spatial = false; | |
361 qm_->change_resolution_temporal = false; | |
362 qm_->spatial_width_fact = 1.0f; | |
363 qm_->spatial_height_fact = 1.0f; | |
364 qm_->temporal_fact = 1.0f; | |
365 action_.spatial = kNoChangeSpatial; | |
366 action_.temporal = kNoChangeTemporal; | |
367 } | |
368 | |
369 void VCMQmResolution::ComputeRatesForSelection() { | |
370 avg_target_rate_ = 0.0f; | |
371 avg_incoming_framerate_ = 0.0f; | |
372 avg_ratio_buffer_low_ = 0.0f; | |
373 avg_rate_mismatch_ = 0.0f; | |
374 avg_rate_mismatch_sgn_ = 0.0f; | |
375 avg_packet_loss_ = 0.0f; | |
376 if (frame_cnt_ > 0) { | |
377 avg_ratio_buffer_low_ = | |
378 static_cast<float>(low_buffer_cnt_) / static_cast<float>(frame_cnt_); | |
379 } | |
380 if (update_rate_cnt_ > 0) { | |
381 avg_rate_mismatch_ = | |
382 static_cast<float>(sum_rate_MM_) / static_cast<float>(update_rate_cnt_); | |
383 avg_rate_mismatch_sgn_ = static_cast<float>(sum_rate_MM_sgn_) / | |
384 static_cast<float>(update_rate_cnt_); | |
385 avg_target_rate_ = static_cast<float>(sum_target_rate_) / | |
386 static_cast<float>(update_rate_cnt_); | |
387 avg_incoming_framerate_ = static_cast<float>(sum_incoming_framerate_) / | |
388 static_cast<float>(update_rate_cnt_); | |
389 avg_packet_loss_ = static_cast<float>(sum_packet_loss_) / | |
390 static_cast<float>(update_rate_cnt_); | |
391 } | |
392 // For selection we may want to weight some quantities more heavily | |
393 // with the current (i.e., next ~1sec) rate values. | |
394 avg_target_rate_ = | |
395 kWeightRate * avg_target_rate_ + (1.0 - kWeightRate) * target_bitrate_; | |
396 avg_incoming_framerate_ = kWeightRate * avg_incoming_framerate_ + | |
397 (1.0 - kWeightRate) * incoming_framerate_; | |
398 // Use base layer frame rate for temporal layers: this will favor spatial. | |
399 assert(num_layers_ > 0); | |
400 framerate_level_ = FrameRateLevel(avg_incoming_framerate_ / | |
401 static_cast<float>(1 << (num_layers_ - 1))); | |
402 } | |
403 | |
404 void VCMQmResolution::ComputeEncoderState() { | |
405 // Default. | |
406 encoder_state_ = kStableEncoding; | |
407 | |
408 // Assign stressed state if: | |
409 // 1) occurrences of low buffer levels is high, or | |
410 // 2) rate mis-match is high, and consistent over-shooting by encoder. | |
411 if ((avg_ratio_buffer_low_ > kMaxBufferLow) || | |
412 ((avg_rate_mismatch_ > kMaxRateMisMatch) && | |
413 (avg_rate_mismatch_sgn_ < -kRateOverShoot))) { | |
414 encoder_state_ = kStressedEncoding; | |
415 } | |
416 // Assign easy state if: | |
417 // 1) rate mis-match is high, and | |
418 // 2) consistent under-shooting by encoder. | |
419 if ((avg_rate_mismatch_ > kMaxRateMisMatch) && | |
420 (avg_rate_mismatch_sgn_ > kRateUnderShoot)) { | |
421 encoder_state_ = kEasyEncoding; | |
422 } | |
423 } | |
424 | |
425 bool VCMQmResolution::GoingUpResolution() { | |
426 // For going up, we check for undoing the previous down-sampling action. | |
427 | |
428 float fac_width = kFactorWidthSpatial[down_action_history_[0].spatial]; | |
429 float fac_height = kFactorHeightSpatial[down_action_history_[0].spatial]; | |
430 float fac_temp = kFactorTemporal[down_action_history_[0].temporal]; | |
431 // For going up spatially, we allow for going up by 3/4x3/4 at each stage. | |
432 // So if the last spatial action was 1/2x1/2 it would be undone in 2 stages. | |
433 // Modify the fac_width/height for this case. | |
434 if (down_action_history_[0].spatial == kOneQuarterSpatialUniform) { | |
435 fac_width = kFactorWidthSpatial[kOneQuarterSpatialUniform] / | |
436 kFactorWidthSpatial[kOneHalfSpatialUniform]; | |
437 fac_height = kFactorHeightSpatial[kOneQuarterSpatialUniform] / | |
438 kFactorHeightSpatial[kOneHalfSpatialUniform]; | |
439 } | |
440 | |
441 // Check if we should go up both spatially and temporally. | |
442 if (down_action_history_[0].spatial != kNoChangeSpatial && | |
443 down_action_history_[0].temporal != kNoChangeTemporal) { | |
444 if (ConditionForGoingUp(fac_width, fac_height, fac_temp, | |
445 kTransRateScaleUpSpatialTemp)) { | |
446 action_.spatial = down_action_history_[0].spatial; | |
447 action_.temporal = down_action_history_[0].temporal; | |
448 UpdateDownsamplingState(kUpResolution); | |
449 return true; | |
450 } | |
451 } | |
452 // Check if we should go up either spatially or temporally. | |
453 bool selected_up_spatial = false; | |
454 bool selected_up_temporal = false; | |
455 if (down_action_history_[0].spatial != kNoChangeSpatial) { | |
456 selected_up_spatial = ConditionForGoingUp(fac_width, fac_height, 1.0f, | |
457 kTransRateScaleUpSpatial); | |
458 } | |
459 if (down_action_history_[0].temporal != kNoChangeTemporal) { | |
460 selected_up_temporal = | |
461 ConditionForGoingUp(1.0f, 1.0f, fac_temp, kTransRateScaleUpTemp); | |
462 } | |
463 if (selected_up_spatial && !selected_up_temporal) { | |
464 action_.spatial = down_action_history_[0].spatial; | |
465 action_.temporal = kNoChangeTemporal; | |
466 UpdateDownsamplingState(kUpResolution); | |
467 return true; | |
468 } else if (!selected_up_spatial && selected_up_temporal) { | |
469 action_.spatial = kNoChangeSpatial; | |
470 action_.temporal = down_action_history_[0].temporal; | |
471 UpdateDownsamplingState(kUpResolution); | |
472 return true; | |
473 } else if (selected_up_spatial && selected_up_temporal) { | |
474 PickSpatialOrTemporal(); | |
475 UpdateDownsamplingState(kUpResolution); | |
476 return true; | |
477 } | |
478 return false; | |
479 } | |
480 | |
481 bool VCMQmResolution::ConditionForGoingUp(float fac_width, | |
482 float fac_height, | |
483 float fac_temp, | |
484 float scale_fac) { | |
485 float estimated_transition_rate_up = | |
486 GetTransitionRate(fac_width, fac_height, fac_temp, scale_fac); | |
487 // Go back up if: | |
488 // 1) target rate is above threshold and current encoder state is stable, or | |
489 // 2) encoder state is easy (encoder is significantly under-shooting target). | |
490 if (((avg_target_rate_ > estimated_transition_rate_up) && | |
491 (encoder_state_ == kStableEncoding)) || | |
492 (encoder_state_ == kEasyEncoding)) { | |
493 return true; | |
494 } else { | |
495 return false; | |
496 } | |
497 } | |
498 | |
499 bool VCMQmResolution::GoingDownResolution() { | |
500 float estimated_transition_rate_down = | |
501 GetTransitionRate(1.0f, 1.0f, 1.0f, 1.0f); | |
502 float max_rate = kFrameRateFac[framerate_level_] * kMaxRateQm[image_type_]; | |
503 // Resolution reduction if: | |
504 // (1) target rate is below transition rate, or | |
505 // (2) encoder is in stressed state and target rate below a max threshold. | |
506 if ((avg_target_rate_ < estimated_transition_rate_down) || | |
507 (encoder_state_ == kStressedEncoding && avg_target_rate_ < max_rate)) { | |
508 // Get the down-sampling action: based on content class, and how low | |
509 // average target rate is relative to transition rate. | |
510 uint8_t spatial_fact = | |
511 kSpatialAction[content_class_ + | |
512 9 * RateClass(estimated_transition_rate_down)]; | |
513 uint8_t temp_fact = | |
514 kTemporalAction[content_class_ + | |
515 9 * RateClass(estimated_transition_rate_down)]; | |
516 | |
517 switch (spatial_fact) { | |
518 case 4: { | |
519 action_.spatial = kOneQuarterSpatialUniform; | |
520 break; | |
521 } | |
522 case 2: { | |
523 action_.spatial = kOneHalfSpatialUniform; | |
524 break; | |
525 } | |
526 case 1: { | |
527 action_.spatial = kNoChangeSpatial; | |
528 break; | |
529 } | |
530 default: { assert(false); } | |
531 } | |
532 switch (temp_fact) { | |
533 case 3: { | |
534 action_.temporal = kTwoThirdsTemporal; | |
535 break; | |
536 } | |
537 case 2: { | |
538 action_.temporal = kOneHalfTemporal; | |
539 break; | |
540 } | |
541 case 1: { | |
542 action_.temporal = kNoChangeTemporal; | |
543 break; | |
544 } | |
545 default: { assert(false); } | |
546 } | |
547 // Only allow for one action (spatial or temporal) at a given time. | |
548 assert(action_.temporal == kNoChangeTemporal || | |
549 action_.spatial == kNoChangeSpatial); | |
550 | |
551 // Adjust cases not captured in tables, mainly based on frame rate, and | |
552 // also check for odd frame sizes. | |
553 AdjustAction(); | |
554 | |
555 // Update down-sampling state. | |
556 if (action_.spatial != kNoChangeSpatial || | |
557 action_.temporal != kNoChangeTemporal) { | |
558 UpdateDownsamplingState(kDownResolution); | |
559 return true; | |
560 } | |
561 } | |
562 return false; | |
563 } | |
564 | |
565 float VCMQmResolution::GetTransitionRate(float fac_width, | |
566 float fac_height, | |
567 float fac_temp, | |
568 float scale_fac) { | |
569 ImageType image_type = | |
570 GetImageType(static_cast<uint16_t>(fac_width * width_), | |
571 static_cast<uint16_t>(fac_height * height_)); | |
572 | |
573 FrameRateLevelClass framerate_level = | |
574 FrameRateLevel(fac_temp * avg_incoming_framerate_); | |
575 // If we are checking for going up temporally, and this is the last | |
576 // temporal action, then use native frame rate. | |
577 if (down_action_history_[1].temporal == kNoChangeTemporal && | |
578 fac_temp > 1.0f) { | |
579 framerate_level = FrameRateLevel(native_frame_rate_); | |
580 } | |
581 | |
582 // The maximum allowed rate below which down-sampling is allowed: | |
583 // Nominal values based on image format (frame size and frame rate). | |
584 float max_rate = kFrameRateFac[framerate_level] * kMaxRateQm[image_type]; | |
585 | |
586 uint8_t image_class = image_type > kVGA ? 1 : 0; | |
587 uint8_t table_index = image_class * 9 + content_class_; | |
588 // Scale factor for down-sampling transition threshold: | |
589 // factor based on the content class and the image size. | |
590 float scaleTransRate = kScaleTransRateQm[table_index]; | |
591 // Threshold bitrate for resolution action. | |
592 return static_cast<float>(scale_fac * scaleTransRate * max_rate); | |
593 } | |
594 | |
595 void VCMQmResolution::UpdateDownsamplingState(UpDownAction up_down) { | |
596 if (up_down == kUpResolution) { | |
597 qm_->spatial_width_fact = 1.0f / kFactorWidthSpatial[action_.spatial]; | |
598 qm_->spatial_height_fact = 1.0f / kFactorHeightSpatial[action_.spatial]; | |
599 // If last spatial action was 1/2x1/2, we undo it in two steps, so the | |
600 // spatial scale factor in this first step is modified as (4.0/3.0 / 2.0). | |
601 if (action_.spatial == kOneQuarterSpatialUniform) { | |
602 qm_->spatial_width_fact = 1.0f * | |
603 kFactorWidthSpatial[kOneHalfSpatialUniform] / | |
604 kFactorWidthSpatial[kOneQuarterSpatialUniform]; | |
605 qm_->spatial_height_fact = | |
606 1.0f * kFactorHeightSpatial[kOneHalfSpatialUniform] / | |
607 kFactorHeightSpatial[kOneQuarterSpatialUniform]; | |
608 } | |
609 qm_->temporal_fact = 1.0f / kFactorTemporal[action_.temporal]; | |
610 RemoveLastDownAction(); | |
611 } else if (up_down == kDownResolution) { | |
612 ConstrainAmountOfDownSampling(); | |
613 ConvertSpatialFractionalToWhole(); | |
614 qm_->spatial_width_fact = kFactorWidthSpatial[action_.spatial]; | |
615 qm_->spatial_height_fact = kFactorHeightSpatial[action_.spatial]; | |
616 qm_->temporal_fact = kFactorTemporal[action_.temporal]; | |
617 InsertLatestDownAction(); | |
618 } else { | |
619 // This function should only be called if either the Up or Down action | |
620 // has been selected. | |
621 assert(false); | |
622 } | |
623 UpdateCodecResolution(); | |
624 state_dec_factor_spatial_ = state_dec_factor_spatial_ * | |
625 qm_->spatial_width_fact * | |
626 qm_->spatial_height_fact; | |
627 state_dec_factor_temporal_ = state_dec_factor_temporal_ * qm_->temporal_fact; | |
628 } | |
629 | |
630 void VCMQmResolution::UpdateCodecResolution() { | |
631 if (action_.spatial != kNoChangeSpatial) { | |
632 qm_->change_resolution_spatial = true; | |
633 qm_->codec_width = | |
634 static_cast<uint16_t>(width_ / qm_->spatial_width_fact + 0.5f); | |
635 qm_->codec_height = | |
636 static_cast<uint16_t>(height_ / qm_->spatial_height_fact + 0.5f); | |
637 // Size should not exceed native sizes. | |
638 assert(qm_->codec_width <= native_width_); | |
639 assert(qm_->codec_height <= native_height_); | |
640 // New sizes should be multiple of 2, otherwise spatial should not have | |
641 // been selected. | |
642 assert(qm_->codec_width % 2 == 0); | |
643 assert(qm_->codec_height % 2 == 0); | |
644 } | |
645 if (action_.temporal != kNoChangeTemporal) { | |
646 qm_->change_resolution_temporal = true; | |
647 // Update the frame rate based on the average incoming frame rate. | |
648 qm_->frame_rate = avg_incoming_framerate_ / qm_->temporal_fact + 0.5f; | |
649 if (down_action_history_[0].temporal == 0) { | |
650 // When we undo the last temporal-down action, make sure we go back up | |
651 // to the native frame rate. Since the incoming frame rate may | |
652 // fluctuate over time, |avg_incoming_framerate_| scaled back up may | |
653 // be smaller than |native_frame rate_|. | |
654 qm_->frame_rate = native_frame_rate_; | |
655 } | |
656 } | |
657 } | |
658 | |
659 uint8_t VCMQmResolution::RateClass(float transition_rate) { | |
660 return avg_target_rate_ < (kFacLowRate * transition_rate) | |
661 ? 0 | |
662 : (avg_target_rate_ >= transition_rate ? 2 : 1); | |
663 } | |
664 | |
665 // TODO(marpan): Would be better to capture these frame rate adjustments by | |
666 // extending the table data (qm_select_data.h). | |
667 void VCMQmResolution::AdjustAction() { | |
668 // If the spatial level is default state (neither low or high), motion level | |
669 // is not high, and spatial action was selected, switch to 2/3 frame rate | |
670 // reduction if the average incoming frame rate is high. | |
671 if (spatial_.level == kDefault && motion_.level != kHigh && | |
672 action_.spatial != kNoChangeSpatial && | |
673 framerate_level_ == kFrameRateHigh) { | |
674 action_.spatial = kNoChangeSpatial; | |
675 action_.temporal = kTwoThirdsTemporal; | |
676 } | |
677 // If both motion and spatial level are low, and temporal down action was | |
678 // selected, switch to spatial 3/4x3/4 if the frame rate is not above the | |
679 // lower middle level (|kFrameRateMiddle1|). | |
680 if (motion_.level == kLow && spatial_.level == kLow && | |
681 framerate_level_ <= kFrameRateMiddle1 && | |
682 action_.temporal != kNoChangeTemporal) { | |
683 action_.spatial = kOneHalfSpatialUniform; | |
684 action_.temporal = kNoChangeTemporal; | |
685 } | |
686 // If spatial action is selected, and there has been too much spatial | |
687 // reduction already (i.e., 1/4), then switch to temporal action if the | |
688 // average frame rate is not low. | |
689 if (action_.spatial != kNoChangeSpatial && | |
690 down_action_history_[0].spatial == kOneQuarterSpatialUniform && | |
691 framerate_level_ != kFrameRateLow) { | |
692 action_.spatial = kNoChangeSpatial; | |
693 action_.temporal = kTwoThirdsTemporal; | |
694 } | |
695 // Never use temporal action if number of temporal layers is above 2. | |
696 if (num_layers_ > 2) { | |
697 if (action_.temporal != kNoChangeTemporal) { | |
698 action_.spatial = kOneHalfSpatialUniform; | |
699 } | |
700 action_.temporal = kNoChangeTemporal; | |
701 } | |
702 // If spatial action was selected, we need to make sure the frame sizes | |
703 // are multiples of two. Otherwise switch to 2/3 temporal. | |
704 if (action_.spatial != kNoChangeSpatial && !EvenFrameSize()) { | |
705 action_.spatial = kNoChangeSpatial; | |
706 // Only one action (spatial or temporal) is allowed at a given time, so need | |
707 // to check whether temporal action is currently selected. | |
708 action_.temporal = kTwoThirdsTemporal; | |
709 } | |
710 } | |
711 | |
712 void VCMQmResolution::ConvertSpatialFractionalToWhole() { | |
713 // If 3/4 spatial is selected, check if there has been another 3/4, | |
714 // and if so, combine them into 1/2. 1/2 scaling is more efficient than 9/16. | |
715 // Note we define 3/4x3/4 spatial as kOneHalfSpatialUniform. | |
716 if (action_.spatial == kOneHalfSpatialUniform) { | |
717 bool found = false; | |
718 int isel = kDownActionHistorySize; | |
719 for (int i = 0; i < kDownActionHistorySize; ++i) { | |
720 if (down_action_history_[i].spatial == kOneHalfSpatialUniform) { | |
721 isel = i; | |
722 found = true; | |
723 break; | |
724 } | |
725 } | |
726 if (found) { | |
727 action_.spatial = kOneQuarterSpatialUniform; | |
728 state_dec_factor_spatial_ = | |
729 state_dec_factor_spatial_ / | |
730 (kFactorWidthSpatial[kOneHalfSpatialUniform] * | |
731 kFactorHeightSpatial[kOneHalfSpatialUniform]); | |
732 // Check if switching to 1/2x1/2 (=1/4) spatial is allowed. | |
733 ConstrainAmountOfDownSampling(); | |
734 if (action_.spatial == kNoChangeSpatial) { | |
735 // Not allowed. Go back to 3/4x3/4 spatial. | |
736 action_.spatial = kOneHalfSpatialUniform; | |
737 state_dec_factor_spatial_ = | |
738 state_dec_factor_spatial_ * | |
739 kFactorWidthSpatial[kOneHalfSpatialUniform] * | |
740 kFactorHeightSpatial[kOneHalfSpatialUniform]; | |
741 } else { | |
742 // Switching is allowed. Remove 3/4x3/4 from the history, and update | |
743 // the frame size. | |
744 for (int i = isel; i < kDownActionHistorySize - 1; ++i) { | |
745 down_action_history_[i].spatial = down_action_history_[i + 1].spatial; | |
746 } | |
747 width_ = width_ * kFactorWidthSpatial[kOneHalfSpatialUniform]; | |
748 height_ = height_ * kFactorHeightSpatial[kOneHalfSpatialUniform]; | |
749 } | |
750 } | |
751 } | |
752 } | |
753 | |
754 // Returns false if the new frame sizes, under the current spatial action, | |
755 // are not multiples of two. | |
756 bool VCMQmResolution::EvenFrameSize() { | |
757 if (action_.spatial == kOneHalfSpatialUniform) { | |
758 if ((width_ * 3 / 4) % 2 != 0 || (height_ * 3 / 4) % 2 != 0) { | |
759 return false; | |
760 } | |
761 } else if (action_.spatial == kOneQuarterSpatialUniform) { | |
762 if ((width_ * 1 / 2) % 2 != 0 || (height_ * 1 / 2) % 2 != 0) { | |
763 return false; | |
764 } | |
765 } | |
766 return true; | |
767 } | |
768 | |
769 void VCMQmResolution::InsertLatestDownAction() { | |
770 if (action_.spatial != kNoChangeSpatial) { | |
771 for (int i = kDownActionHistorySize - 1; i > 0; --i) { | |
772 down_action_history_[i].spatial = down_action_history_[i - 1].spatial; | |
773 } | |
774 down_action_history_[0].spatial = action_.spatial; | |
775 } | |
776 if (action_.temporal != kNoChangeTemporal) { | |
777 for (int i = kDownActionHistorySize - 1; i > 0; --i) { | |
778 down_action_history_[i].temporal = down_action_history_[i - 1].temporal; | |
779 } | |
780 down_action_history_[0].temporal = action_.temporal; | |
781 } | |
782 } | |
783 | |
784 void VCMQmResolution::RemoveLastDownAction() { | |
785 if (action_.spatial != kNoChangeSpatial) { | |
786 // If the last spatial action was 1/2x1/2 we replace it with 3/4x3/4. | |
787 if (action_.spatial == kOneQuarterSpatialUniform) { | |
788 down_action_history_[0].spatial = kOneHalfSpatialUniform; | |
789 } else { | |
790 for (int i = 0; i < kDownActionHistorySize - 1; ++i) { | |
791 down_action_history_[i].spatial = down_action_history_[i + 1].spatial; | |
792 } | |
793 down_action_history_[kDownActionHistorySize - 1].spatial = | |
794 kNoChangeSpatial; | |
795 } | |
796 } | |
797 if (action_.temporal != kNoChangeTemporal) { | |
798 for (int i = 0; i < kDownActionHistorySize - 1; ++i) { | |
799 down_action_history_[i].temporal = down_action_history_[i + 1].temporal; | |
800 } | |
801 down_action_history_[kDownActionHistorySize - 1].temporal = | |
802 kNoChangeTemporal; | |
803 } | |
804 } | |
805 | |
806 void VCMQmResolution::ConstrainAmountOfDownSampling() { | |
807 // Sanity checks on down-sampling selection: | |
808 // override the settings for too small image size and/or frame rate. | |
809 // Also check the limit on current down-sampling states. | |
810 | |
811 float spatial_width_fact = kFactorWidthSpatial[action_.spatial]; | |
812 float spatial_height_fact = kFactorHeightSpatial[action_.spatial]; | |
813 float temporal_fact = kFactorTemporal[action_.temporal]; | |
814 float new_dec_factor_spatial = | |
815 state_dec_factor_spatial_ * spatial_width_fact * spatial_height_fact; | |
816 float new_dec_factor_temp = state_dec_factor_temporal_ * temporal_fact; | |
817 | |
818 // No spatial sampling if current frame size is too small, or if the | |
819 // amount of spatial down-sampling is above maximum spatial down-action. | |
820 if ((width_ * height_) <= kMinImageSize || | |
821 new_dec_factor_spatial > kMaxSpatialDown) { | |
822 action_.spatial = kNoChangeSpatial; | |
823 new_dec_factor_spatial = state_dec_factor_spatial_; | |
824 } | |
825 // No frame rate reduction if average frame rate is below some point, or if | |
826 // the amount of temporal down-sampling is above maximum temporal down-action. | |
827 if (avg_incoming_framerate_ <= kMinFrameRate || | |
828 new_dec_factor_temp > kMaxTempDown) { | |
829 action_.temporal = kNoChangeTemporal; | |
830 new_dec_factor_temp = state_dec_factor_temporal_; | |
831 } | |
832 // Check if the total (spatial-temporal) down-action is above maximum allowed, | |
833 // if so, disallow the current selected down-action. | |
834 if (new_dec_factor_spatial * new_dec_factor_temp > kMaxTotalDown) { | |
835 if (action_.spatial != kNoChangeSpatial) { | |
836 action_.spatial = kNoChangeSpatial; | |
837 } else if (action_.temporal != kNoChangeTemporal) { | |
838 action_.temporal = kNoChangeTemporal; | |
839 } else { | |
840 // We only allow for one action (spatial or temporal) at a given time, so | |
841 // either spatial or temporal action is selected when this function is | |
842 // called. If the selected action is disallowed from one of the above | |
843 // 2 prior conditions (on spatial & temporal max down-action), then this | |
844 // condition "total down-action > |kMaxTotalDown|" would not be entered. | |
845 assert(false); | |
846 } | |
847 } | |
848 } | |
849 | |
850 void VCMQmResolution::PickSpatialOrTemporal() { | |
851 // Pick the one that has had the most down-sampling thus far. | |
852 if (state_dec_factor_spatial_ > state_dec_factor_temporal_) { | |
853 action_.spatial = down_action_history_[0].spatial; | |
854 action_.temporal = kNoChangeTemporal; | |
855 } else { | |
856 action_.spatial = kNoChangeSpatial; | |
857 action_.temporal = down_action_history_[0].temporal; | |
858 } | |
859 } | |
860 | |
861 // TODO(marpan): Update when we allow for directional spatial down-sampling. | |
862 void VCMQmResolution::SelectSpatialDirectionMode(float transition_rate) { | |
863 // Default is 4/3x4/3 | |
864 // For bit rates well below transitional rate, we select 2x2. | |
865 if (avg_target_rate_ < transition_rate * kRateRedSpatial2X2) { | |
866 qm_->spatial_width_fact = 2.0f; | |
867 qm_->spatial_height_fact = 2.0f; | |
868 } | |
869 // Otherwise check prediction errors and aspect ratio. | |
870 float spatial_err = 0.0f; | |
871 float spatial_err_h = 0.0f; | |
872 float spatial_err_v = 0.0f; | |
873 if (content_metrics_) { | |
874 spatial_err = content_metrics_->spatial_pred_err; | |
875 spatial_err_h = content_metrics_->spatial_pred_err_h; | |
876 spatial_err_v = content_metrics_->spatial_pred_err_v; | |
877 } | |
878 | |
879 // Favor 1x2 if aspect_ratio is 16:9. | |
880 if (aspect_ratio_ >= 16.0f / 9.0f) { | |
881 // Check if 1x2 has lowest prediction error. | |
882 if (spatial_err_h < spatial_err && spatial_err_h < spatial_err_v) { | |
883 qm_->spatial_width_fact = 2.0f; | |
884 qm_->spatial_height_fact = 1.0f; | |
885 } | |
886 } | |
887 // Check for 4/3x4/3 selection: favor 2x2 over 1x2 and 2x1. | |
888 if (spatial_err < spatial_err_h * (1.0f + kSpatialErr2x2VsHoriz) && | |
889 spatial_err < spatial_err_v * (1.0f + kSpatialErr2X2VsVert)) { | |
890 qm_->spatial_width_fact = 4.0f / 3.0f; | |
891 qm_->spatial_height_fact = 4.0f / 3.0f; | |
892 } | |
893 // Check for 2x1 selection. | |
894 if (spatial_err_v < spatial_err_h * (1.0f - kSpatialErrVertVsHoriz) && | |
895 spatial_err_v < spatial_err * (1.0f - kSpatialErr2X2VsVert)) { | |
896 qm_->spatial_width_fact = 1.0f; | |
897 qm_->spatial_height_fact = 2.0f; | |
898 } | |
899 } | |
900 | |
901 } // namespace webrtc | |
OLD | NEW |