OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h" | 11 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h" |
12 | 12 |
13 #include <string.h> | 13 #include <string.h> |
14 #include <memory> | |
15 #include <utility> | |
16 #include <vector> | 14 #include <vector> |
17 | 15 |
18 #include "webrtc/base/checks.h" | 16 #include "webrtc/base/checks.h" |
19 #include "webrtc/base/logging.h" | 17 #include "webrtc/base/logging.h" |
20 #include "webrtc/modules/include/module_common_types.h" | 18 #include "webrtc/modules/include/module_common_types.h" |
21 #include "webrtc/modules/rtp_rtcp/source/byte_io.h" | 19 #include "webrtc/modules/rtp_rtcp/source/byte_io.h" |
22 #include "webrtc/common_video/h264/sps_vui_rewriter.h" | 20 #include "webrtc/common_video/h264/sps_vui_rewriter.h" |
23 #include "webrtc/common_video/h264/h264_common.h" | 21 #include "webrtc/common_video/h264/h264_common.h" |
24 #include "webrtc/common_video/h264/pps_parser.h" | |
25 #include "webrtc/common_video/h264/sps_parser.h" | 22 #include "webrtc/common_video/h264/sps_parser.h" |
26 #include "webrtc/system_wrappers/include/metrics.h" | 23 #include "webrtc/system_wrappers/include/metrics.h" |
27 | 24 |
28 namespace webrtc { | 25 namespace webrtc { |
29 namespace { | 26 namespace { |
30 | 27 |
31 static const size_t kNalHeaderSize = 1; | 28 static const size_t kNalHeaderSize = 1; |
32 static const size_t kFuAHeaderSize = 2; | 29 static const size_t kFuAHeaderSize = 2; |
33 static const size_t kLengthFieldSize = 2; | 30 static const size_t kLengthFieldSize = 2; |
34 static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize; | 31 static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize; |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
109 // delay because it allows decode order to differ from render order. | 106 // delay because it allows decode order to differ from render order. |
110 // The mechanism used is to rewrite (edit or add) the SPS's VUI to contain | 107 // The mechanism used is to rewrite (edit or add) the SPS's VUI to contain |
111 // restrictions on the maximum number of reordered pictures. This reduces | 108 // restrictions on the maximum number of reordered pictures. This reduces |
112 // latency significantly, though it still adds about a frame of latency to | 109 // latency significantly, though it still adds about a frame of latency to |
113 // decoding. | 110 // decoding. |
114 // Note that we do this rewriting both here (send side, in order to | 111 // Note that we do this rewriting both here (send side, in order to |
115 // protect legacy receive clients) and below in | 112 // protect legacy receive clients) and below in |
116 // RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to | 113 // RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to |
117 // protect us from unknown or legacy send clients). | 114 // protect us from unknown or legacy send clients). |
118 | 115 |
| 116 // Create temporary RBSP decoded buffer of the payload (exlcuding the |
| 117 // leading nalu type header byte (the SpsParser uses only the payload). |
| 118 std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp( |
| 119 buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize); |
119 rtc::Optional<SpsParser::SpsState> sps; | 120 rtc::Optional<SpsParser::SpsState> sps; |
120 | 121 |
121 std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer()); | 122 std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer()); |
122 // Add the type header to the output buffer first, so that the rewriter | 123 // Add the type header to the output buffer first, so that the rewriter |
123 // can append modified payload on top of that. | 124 // can append modified payload on top of that. |
124 output_buffer->AppendData(buffer[0]); | 125 output_buffer->AppendData(buffer[0]); |
125 SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( | 126 SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( |
126 buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize, &sps, | 127 rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get()); |
127 output_buffer.get()); | |
128 | 128 |
129 switch (result) { | 129 switch (result) { |
130 case SpsVuiRewriter::ParseResult::kVuiRewritten: | 130 case SpsVuiRewriter::ParseResult::kVuiRewritten: |
131 input_fragments_.push_back( | 131 input_fragments_.push_back( |
132 Fragment(output_buffer->data(), output_buffer->size())); | 132 Fragment(output_buffer->data(), output_buffer->size())); |
133 input_fragments_.rbegin()->tmp_buffer = std::move(output_buffer); | 133 input_fragments_.rbegin()->tmp_buffer = std::move(output_buffer); |
134 updated_sps = true; | 134 updated_sps = true; |
135 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, | 135 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, |
136 SpsValidEvent::kSentSpsRewritten, | 136 SpsValidEvent::kSentSpsRewritten, |
137 SpsValidEvent::kSpsRewrittenMax); | 137 SpsValidEvent::kSpsRewrittenMax); |
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
335 if (payload_data_length == 0) { | 335 if (payload_data_length == 0) { |
336 LOG(LS_ERROR) << "Empty payload."; | 336 LOG(LS_ERROR) << "Empty payload."; |
337 return false; | 337 return false; |
338 } | 338 } |
339 | 339 |
340 offset_ = 0; | 340 offset_ = 0; |
341 length_ = payload_data_length; | 341 length_ = payload_data_length; |
342 modified_buffer_.reset(); | 342 modified_buffer_.reset(); |
343 | 343 |
344 uint8_t nal_type = payload_data[0] & kTypeMask; | 344 uint8_t nal_type = payload_data[0] & kTypeMask; |
345 parsed_payload->type.Video.codecHeader.H264.nalus_length = 0; | |
346 if (nal_type == H264::NaluType::kFuA) { | 345 if (nal_type == H264::NaluType::kFuA) { |
347 // Fragmented NAL units (FU-A). | 346 // Fragmented NAL units (FU-A). |
348 if (!ParseFuaNalu(parsed_payload, payload_data)) | 347 if (!ParseFuaNalu(parsed_payload, payload_data)) |
349 return false; | 348 return false; |
350 } else { | 349 } else { |
351 // We handle STAP-A and single NALU's the same way here. The jitter buffer | 350 // We handle STAP-A and single NALU's the same way here. The jitter buffer |
352 // will depacketize the STAP-A into NAL units later. | 351 // will depacketize the STAP-A into NAL units later. |
353 // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. | 352 // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. |
354 if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data)) | 353 if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data)) |
355 return false; | 354 return false; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
402 for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { | 401 for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { |
403 size_t start_offset = nalu_start_offsets[i]; | 402 size_t start_offset = nalu_start_offsets[i]; |
404 // End offset is actually start offset for next unit, excluding length field | 403 // End offset is actually start offset for next unit, excluding length field |
405 // so remove that from this units length. | 404 // so remove that from this units length. |
406 size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize; | 405 size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize; |
407 if (end_offset - start_offset < H264::kNaluTypeSize) { | 406 if (end_offset - start_offset < H264::kNaluTypeSize) { |
408 LOG(LS_ERROR) << "STAP-A packet too short"; | 407 LOG(LS_ERROR) << "STAP-A packet too short"; |
409 return false; | 408 return false; |
410 } | 409 } |
411 | 410 |
412 NaluInfo nalu; | 411 nal_type = payload_data[start_offset] & kTypeMask; |
413 nalu.type = payload_data[start_offset] & kTypeMask; | |
414 nalu.sps_id = -1; | |
415 nalu.pps_id = -1; | |
416 start_offset += H264::kNaluTypeSize; | 412 start_offset += H264::kNaluTypeSize; |
417 | 413 |
418 switch (nalu.type) { | 414 if (nal_type == H264::NaluType::kSps) { |
419 case H264::NaluType::kSps: { | 415 // Check if VUI is present in SPS and if it needs to be modified to avoid |
420 // Check if VUI is present in SPS and if it needs to be modified to | 416 // excessive decoder latency. |
421 // avoid | |
422 // excessive decoder latency. | |
423 | 417 |
424 // Copy any previous data first (likely just the first header). | 418 // Copy any previous data first (likely just the first header). |
425 std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer()); | 419 std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer()); |
426 if (start_offset) | 420 if (start_offset) |
427 output_buffer->AppendData(payload_data, start_offset); | 421 output_buffer->AppendData(payload_data, start_offset); |
428 | 422 |
429 rtc::Optional<SpsParser::SpsState> sps; | 423 // RBSP decode of payload data. |
| 424 std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp( |
| 425 &payload_data[start_offset], end_offset - start_offset); |
| 426 rtc::Optional<SpsParser::SpsState> sps; |
430 | 427 |
431 SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( | 428 SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( |
432 &payload_data[start_offset], end_offset - start_offset, &sps, | 429 rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get()); |
433 output_buffer.get()); | 430 switch (result) { |
434 switch (result) { | 431 case SpsVuiRewriter::ParseResult::kVuiRewritten: |
435 case SpsVuiRewriter::ParseResult::kVuiRewritten: | 432 if (modified_buffer_) { |
436 if (modified_buffer_) { | 433 LOG(LS_WARNING) << "More than one H264 SPS NAL units needing " |
437 LOG(LS_WARNING) | 434 "rewriting found within a single STAP-A packet. " |
438 << "More than one H264 SPS NAL units needing " | 435 "Keeping the first and rewriting the last."; |
439 "rewriting found within a single STAP-A packet. " | 436 } |
440 "Keeping the first and rewriting the last."; | |
441 } | |
442 | 437 |
443 // Rewrite length field to new SPS size. | 438 // Rewrite length field to new SPS size. |
444 if (h264_header->packetization_type == kH264StapA) { | 439 if (h264_header->packetization_type == kH264StapA) { |
445 size_t length_field_offset = | 440 size_t length_field_offset = |
446 start_offset - (H264::kNaluTypeSize + kLengthFieldSize); | 441 start_offset - (H264::kNaluTypeSize + kLengthFieldSize); |
447 // Stap-A Length includes payload data and type header. | 442 // Stap-A Length includes payload data and type header. |
448 size_t rewritten_size = | 443 size_t rewritten_size = |
449 output_buffer->size() - start_offset + H264::kNaluTypeSize; | 444 output_buffer->size() - start_offset + H264::kNaluTypeSize; |
450 ByteWriter<uint16_t>::WriteBigEndian( | 445 ByteWriter<uint16_t>::WriteBigEndian( |
451 &(*output_buffer)[length_field_offset], rewritten_size); | 446 &(*output_buffer)[length_field_offset], rewritten_size); |
452 } | 447 } |
453 | 448 |
454 // Append rest of packet. | 449 // Append rest of packet. |
455 output_buffer->AppendData( | 450 output_buffer->AppendData(&payload_data[end_offset], |
456 &payload_data[end_offset], | 451 nalu_length + kNalHeaderSize - end_offset); |
457 nalu_length + kNalHeaderSize - end_offset); | |
458 | 452 |
459 modified_buffer_ = std::move(output_buffer); | 453 modified_buffer_ = std::move(output_buffer); |
460 length_ = modified_buffer_->size(); | 454 length_ = modified_buffer_->size(); |
461 | 455 |
462 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, | 456 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, |
463 SpsValidEvent::kReceivedSpsRewritten, | 457 SpsValidEvent::kReceivedSpsRewritten, |
464 SpsValidEvent::kSpsRewrittenMax); | 458 SpsValidEvent::kSpsRewrittenMax); |
465 break; | 459 break; |
466 case SpsVuiRewriter::ParseResult::kPocOk: | 460 case SpsVuiRewriter::ParseResult::kPocOk: |
467 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, | 461 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, |
468 SpsValidEvent::kReceivedSpsPocOk, | 462 SpsValidEvent::kReceivedSpsPocOk, |
469 SpsValidEvent::kSpsRewrittenMax); | 463 SpsValidEvent::kSpsRewrittenMax); |
470 break; | 464 break; |
471 case SpsVuiRewriter::ParseResult::kVuiOk: | 465 case SpsVuiRewriter::ParseResult::kVuiOk: |
472 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, | 466 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, |
473 SpsValidEvent::kReceivedSpsVuiOk, | 467 SpsValidEvent::kReceivedSpsVuiOk, |
474 SpsValidEvent::kSpsRewrittenMax); | 468 SpsValidEvent::kSpsRewrittenMax); |
475 break; | 469 break; |
476 case SpsVuiRewriter::ParseResult::kFailure: | 470 case SpsVuiRewriter::ParseResult::kFailure: |
477 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, | 471 RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, |
478 SpsValidEvent::kReceivedSpsParseFailure, | 472 SpsValidEvent::kReceivedSpsParseFailure, |
479 SpsValidEvent::kSpsRewrittenMax); | 473 SpsValidEvent::kSpsRewrittenMax); |
480 break; | 474 break; |
481 } | 475 } |
482 | 476 |
483 if (sps) { | 477 if (sps) { |
484 parsed_payload->type.Video.width = sps->width; | 478 parsed_payload->type.Video.width = sps->width; |
485 parsed_payload->type.Video.height = sps->height; | 479 parsed_payload->type.Video.height = sps->height; |
486 nalu.sps_id = sps->id; | |
487 } | |
488 parsed_payload->frame_type = kVideoFrameKey; | |
489 break; | |
490 } | 480 } |
491 case H264::NaluType::kPps: { | 481 parsed_payload->frame_type = kVideoFrameKey; |
492 rtc::Optional<PpsParser::PpsState> pps = PpsParser::ParsePps( | 482 } else if (nal_type == H264::NaluType::kPps || |
493 &payload_data[start_offset], end_offset - start_offset); | 483 nal_type == H264::NaluType::kSei || |
494 if (pps) { | 484 nal_type == H264::NaluType::kIdr) { |
495 nalu.sps_id = pps->sps_id; | 485 parsed_payload->frame_type = kVideoFrameKey; |
496 nalu.pps_id = pps->id; | |
497 } | |
498 break; | |
499 } | |
500 case H264::NaluType::kSei: | |
501 FALLTHROUGH(); | |
502 case H264::NaluType::kIdr: | |
503 parsed_payload->frame_type = kVideoFrameKey; | |
504 FALLTHROUGH(); | |
505 default: { | |
506 rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice( | |
507 &payload_data[start_offset], end_offset - start_offset); | |
508 if (pps_id) | |
509 nalu.pps_id = *pps_id; | |
510 break; | |
511 } | |
512 } | |
513 RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264; | |
514 if (h264->nalus_length == kMaxNalusPerPacket) { | |
515 LOG(LS_WARNING) | |
516 << "Received packet containing more than " << kMaxNalusPerPacket | |
517 << " NAL units. Will not keep track sps and pps ids for all of them."; | |
518 } else { | |
519 h264->nalus[h264->nalus_length++] = nalu; | |
520 } | 486 } |
521 } | 487 } |
522 | 488 |
523 return true; | 489 return true; |
524 } | 490 } |
525 | 491 |
526 bool RtpDepacketizerH264::ParseFuaNalu( | 492 bool RtpDepacketizerH264::ParseFuaNalu( |
527 RtpDepacketizer::ParsedPayload* parsed_payload, | 493 RtpDepacketizer::ParsedPayload* parsed_payload, |
528 const uint8_t* payload_data) { | 494 const uint8_t* payload_data) { |
529 if (length_ < kFuAHeaderSize) { | 495 if (length_ < kFuAHeaderSize) { |
530 LOG(LS_ERROR) << "FU-A NAL units truncated."; | 496 LOG(LS_ERROR) << "FU-A NAL units truncated."; |
531 return false; | 497 return false; |
532 } | 498 } |
533 uint8_t fnri = payload_data[0] & (kFBit | kNriMask); | 499 uint8_t fnri = payload_data[0] & (kFBit | kNriMask); |
534 uint8_t original_nal_type = payload_data[1] & kTypeMask; | 500 uint8_t original_nal_type = payload_data[1] & kTypeMask; |
535 bool first_fragment = (payload_data[1] & kSBit) > 0; | 501 bool first_fragment = (payload_data[1] & kSBit) > 0; |
536 NaluInfo nalu; | 502 |
537 nalu.type = original_nal_type; | |
538 nalu.sps_id = -1; | |
539 nalu.pps_id = -1; | |
540 if (first_fragment) { | 503 if (first_fragment) { |
541 offset_ = 0; | 504 offset_ = 0; |
542 length_ -= kNalHeaderSize; | 505 length_ -= kNalHeaderSize; |
543 rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice( | |
544 payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize); | |
545 if (pps_id) | |
546 nalu.pps_id = *pps_id; | |
547 uint8_t original_nal_header = fnri | original_nal_type; | 506 uint8_t original_nal_header = fnri | original_nal_type; |
548 modified_buffer_.reset(new rtc::Buffer()); | 507 modified_buffer_.reset(new rtc::Buffer()); |
549 modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); | 508 modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); |
550 (*modified_buffer_)[0] = original_nal_header; | 509 (*modified_buffer_)[0] = original_nal_header; |
551 } else { | 510 } else { |
552 offset_ = kFuAHeaderSize; | 511 offset_ = kFuAHeaderSize; |
553 length_ -= kFuAHeaderSize; | 512 length_ -= kFuAHeaderSize; |
554 } | 513 } |
555 | 514 |
556 if (original_nal_type == H264::NaluType::kIdr) { | 515 if (original_nal_type == H264::NaluType::kIdr) { |
557 parsed_payload->frame_type = kVideoFrameKey; | 516 parsed_payload->frame_type = kVideoFrameKey; |
558 } else { | 517 } else { |
559 parsed_payload->frame_type = kVideoFrameDelta; | 518 parsed_payload->frame_type = kVideoFrameDelta; |
560 } | 519 } |
561 parsed_payload->type.Video.width = 0; | 520 parsed_payload->type.Video.width = 0; |
562 parsed_payload->type.Video.height = 0; | 521 parsed_payload->type.Video.height = 0; |
563 parsed_payload->type.Video.codec = kRtpVideoH264; | 522 parsed_payload->type.Video.codec = kRtpVideoH264; |
564 parsed_payload->type.Video.isFirstPacket = first_fragment; | 523 parsed_payload->type.Video.isFirstPacket = first_fragment; |
565 RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264; | 524 RTPVideoHeaderH264* h264_header = |
566 h264->packetization_type = kH264FuA; | 525 &parsed_payload->type.Video.codecHeader.H264; |
567 h264->nalu_type = original_nal_type; | 526 h264_header->packetization_type = kH264FuA; |
568 h264->nalus[h264->nalus_length] = nalu; | 527 h264_header->nalu_type = original_nal_type; |
569 h264->nalus_length = 1; | |
570 return true; | 528 return true; |
571 } | 529 } |
572 | 530 |
573 } // namespace webrtc | 531 } // namespace webrtc |
OLD | NEW |