OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 462 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
473 * samples_per_channel_ * num_channels_ | 473 * samples_per_channel_ * num_channels_ |
474 * | 474 * |
475 * - Stereo data is interleaved starting with the left channel. | 475 * - Stereo data is interleaved starting with the left channel. |
476 * | 476 * |
477 * - The +operator assume that you would never add exactly opposite frames when | 477 * - The +operator assume that you would never add exactly opposite frames when |
478 * deciding the resulting state. To do this use the -operator. | 478 * deciding the resulting state. To do this use the -operator. |
479 */ | 479 */ |
480 class AudioFrame { | 480 class AudioFrame { |
481 public: | 481 public: |
482 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) | 482 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) |
483 static const int kMaxDataSizeSamples = 3840; | 483 static const size_t kMaxDataSizeSamples = 3840; |
484 | 484 |
485 enum VADActivity { | 485 enum VADActivity { |
486 kVadActive = 0, | 486 kVadActive = 0, |
487 kVadPassive = 1, | 487 kVadPassive = 1, |
488 kVadUnknown = 2 | 488 kVadUnknown = 2 |
489 }; | 489 }; |
490 enum SpeechType { | 490 enum SpeechType { |
491 kNormalSpeech = 0, | 491 kNormalSpeech = 0, |
492 kPLC = 1, | 492 kPLC = 1, |
493 kCNG = 2, | 493 kCNG = 2, |
494 kPLCCNG = 3, | 494 kPLCCNG = 3, |
495 kUndefined = 4 | 495 kUndefined = 4 |
496 }; | 496 }; |
497 | 497 |
498 AudioFrame(); | 498 AudioFrame(); |
499 virtual ~AudioFrame() {} | 499 virtual ~AudioFrame() {} |
500 | 500 |
501 // Resets all members to their default state (except does not modify the | 501 // Resets all members to their default state (except does not modify the |
502 // contents of |data_|). | 502 // contents of |data_|). |
503 void Reset(); | 503 void Reset(); |
504 | 504 |
505 // |interleaved_| is not changed by this method. | 505 // |interleaved_| is not changed by this method. |
506 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, | 506 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, |
507 int samples_per_channel, int sample_rate_hz, | 507 size_t samples_per_channel, int sample_rate_hz, |
508 SpeechType speech_type, VADActivity vad_activity, | 508 SpeechType speech_type, VADActivity vad_activity, |
509 int num_channels = 1, uint32_t energy = -1); | 509 int num_channels = 1, uint32_t energy = -1); |
510 | 510 |
511 AudioFrame& Append(const AudioFrame& rhs); | 511 AudioFrame& Append(const AudioFrame& rhs); |
512 | 512 |
513 void CopyFrom(const AudioFrame& src); | 513 void CopyFrom(const AudioFrame& src); |
514 | 514 |
515 void Mute(); | 515 void Mute(); |
516 | 516 |
517 AudioFrame& operator>>=(const int rhs); | 517 AudioFrame& operator>>=(const int rhs); |
518 AudioFrame& operator+=(const AudioFrame& rhs); | 518 AudioFrame& operator+=(const AudioFrame& rhs); |
519 AudioFrame& operator-=(const AudioFrame& rhs); | 519 AudioFrame& operator-=(const AudioFrame& rhs); |
520 | 520 |
521 int id_; | 521 int id_; |
522 // RTP timestamp of the first sample in the AudioFrame. | 522 // RTP timestamp of the first sample in the AudioFrame. |
523 uint32_t timestamp_; | 523 uint32_t timestamp_; |
524 // Time since the first frame in milliseconds. | 524 // Time since the first frame in milliseconds. |
525 // -1 represents an uninitialized value. | 525 // -1 represents an uninitialized value. |
526 int64_t elapsed_time_ms_; | 526 int64_t elapsed_time_ms_; |
527 // NTP time of the estimated capture time in local timebase in milliseconds. | 527 // NTP time of the estimated capture time in local timebase in milliseconds. |
528 // -1 represents an uninitialized value. | 528 // -1 represents an uninitialized value. |
529 int64_t ntp_time_ms_; | 529 int64_t ntp_time_ms_; |
530 int16_t data_[kMaxDataSizeSamples]; | 530 int16_t data_[kMaxDataSizeSamples]; |
531 int samples_per_channel_; | 531 size_t samples_per_channel_; |
532 int sample_rate_hz_; | 532 int sample_rate_hz_; |
533 int num_channels_; | 533 int num_channels_; |
534 SpeechType speech_type_; | 534 SpeechType speech_type_; |
535 VADActivity vad_activity_; | 535 VADActivity vad_activity_; |
536 // Note that there is no guarantee that |energy_| is correct. Any user of this | 536 // Note that there is no guarantee that |energy_| is correct. Any user of this |
537 // member must verify that the value is correct. | 537 // member must verify that the value is correct. |
538 // TODO(henrike) Remove |energy_|. | 538 // TODO(henrike) Remove |energy_|. |
539 // See https://code.google.com/p/webrtc/issues/detail?id=3315. | 539 // See https://code.google.com/p/webrtc/issues/detail?id=3315. |
540 uint32_t energy_; | 540 uint32_t energy_; |
541 bool interleaved_; | 541 bool interleaved_; |
(...skipping 19 matching lines...) Expand all Loading... |
561 num_channels_ = 0; | 561 num_channels_ = 0; |
562 speech_type_ = kUndefined; | 562 speech_type_ = kUndefined; |
563 vad_activity_ = kVadUnknown; | 563 vad_activity_ = kVadUnknown; |
564 energy_ = 0xffffffff; | 564 energy_ = 0xffffffff; |
565 interleaved_ = true; | 565 interleaved_ = true; |
566 } | 566 } |
567 | 567 |
568 inline void AudioFrame::UpdateFrame(int id, | 568 inline void AudioFrame::UpdateFrame(int id, |
569 uint32_t timestamp, | 569 uint32_t timestamp, |
570 const int16_t* data, | 570 const int16_t* data, |
571 int samples_per_channel, | 571 size_t samples_per_channel, |
572 int sample_rate_hz, | 572 int sample_rate_hz, |
573 SpeechType speech_type, | 573 SpeechType speech_type, |
574 VADActivity vad_activity, | 574 VADActivity vad_activity, |
575 int num_channels, | 575 int num_channels, |
576 uint32_t energy) { | 576 uint32_t energy) { |
577 id_ = id; | 577 id_ = id; |
578 timestamp_ = timestamp; | 578 timestamp_ = timestamp; |
579 samples_per_channel_ = samples_per_channel; | 579 samples_per_channel_ = samples_per_channel; |
580 sample_rate_hz_ = sample_rate_hz; | 580 sample_rate_hz_ = sample_rate_hz; |
581 speech_type_ = speech_type; | 581 speech_type_ = speech_type; |
582 vad_activity_ = vad_activity; | 582 vad_activity_ = vad_activity; |
583 num_channels_ = num_channels; | 583 num_channels_ = num_channels; |
584 energy_ = energy; | 584 energy_ = energy; |
585 | 585 |
586 assert(num_channels >= 0); | 586 assert(num_channels >= 0); |
587 const int length = samples_per_channel * num_channels; | 587 const size_t length = samples_per_channel * num_channels; |
588 assert(length <= kMaxDataSizeSamples); | 588 assert(length <= kMaxDataSizeSamples); |
589 if (data != NULL) { | 589 if (data != NULL) { |
590 memcpy(data_, data, sizeof(int16_t) * length); | 590 memcpy(data_, data, sizeof(int16_t) * length); |
591 } else { | 591 } else { |
592 memset(data_, 0, sizeof(int16_t) * length); | 592 memset(data_, 0, sizeof(int16_t) * length); |
593 } | 593 } |
594 } | 594 } |
595 | 595 |
596 inline void AudioFrame::CopyFrom(const AudioFrame& src) { | 596 inline void AudioFrame::CopyFrom(const AudioFrame& src) { |
597 if (this == &src) return; | 597 if (this == &src) return; |
598 | 598 |
599 id_ = src.id_; | 599 id_ = src.id_; |
600 timestamp_ = src.timestamp_; | 600 timestamp_ = src.timestamp_; |
601 elapsed_time_ms_ = src.elapsed_time_ms_; | 601 elapsed_time_ms_ = src.elapsed_time_ms_; |
602 ntp_time_ms_ = src.ntp_time_ms_; | 602 ntp_time_ms_ = src.ntp_time_ms_; |
603 samples_per_channel_ = src.samples_per_channel_; | 603 samples_per_channel_ = src.samples_per_channel_; |
604 sample_rate_hz_ = src.sample_rate_hz_; | 604 sample_rate_hz_ = src.sample_rate_hz_; |
605 speech_type_ = src.speech_type_; | 605 speech_type_ = src.speech_type_; |
606 vad_activity_ = src.vad_activity_; | 606 vad_activity_ = src.vad_activity_; |
607 num_channels_ = src.num_channels_; | 607 num_channels_ = src.num_channels_; |
608 energy_ = src.energy_; | 608 energy_ = src.energy_; |
609 interleaved_ = src.interleaved_; | 609 interleaved_ = src.interleaved_; |
610 | 610 |
611 assert(num_channels_ >= 0); | 611 assert(num_channels_ >= 0); |
612 const int length = samples_per_channel_ * num_channels_; | 612 const size_t length = samples_per_channel_ * num_channels_; |
613 assert(length <= kMaxDataSizeSamples); | 613 assert(length <= kMaxDataSizeSamples); |
614 memcpy(data_, src.data_, sizeof(int16_t) * length); | 614 memcpy(data_, src.data_, sizeof(int16_t) * length); |
615 } | 615 } |
616 | 616 |
617 inline void AudioFrame::Mute() { | 617 inline void AudioFrame::Mute() { |
618 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); | 618 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); |
619 } | 619 } |
620 | 620 |
621 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { | 621 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { |
622 assert((num_channels_ > 0) && (num_channels_ < 3)); | 622 assert((num_channels_ > 0) && (num_channels_ < 3)); |
623 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 623 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
624 | 624 |
625 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 625 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
626 data_[i] = static_cast<int16_t>(data_[i] >> rhs); | 626 data_[i] = static_cast<int16_t>(data_[i] >> rhs); |
627 } | 627 } |
628 return *this; | 628 return *this; |
629 } | 629 } |
630 | 630 |
631 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { | 631 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { |
632 // Sanity check | 632 // Sanity check |
633 assert((num_channels_ > 0) && (num_channels_ < 3)); | 633 assert((num_channels_ > 0) && (num_channels_ < 3)); |
634 assert(interleaved_ == rhs.interleaved_); | 634 assert(interleaved_ == rhs.interleaved_); |
635 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 635 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
636 if (num_channels_ != rhs.num_channels_) return *this; | 636 if (num_channels_ != rhs.num_channels_) return *this; |
637 | 637 |
638 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { | 638 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { |
639 vad_activity_ = kVadActive; | 639 vad_activity_ = kVadActive; |
640 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { | 640 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { |
641 vad_activity_ = kVadUnknown; | 641 vad_activity_ = kVadUnknown; |
642 } | 642 } |
643 if (speech_type_ != rhs.speech_type_) { | 643 if (speech_type_ != rhs.speech_type_) { |
644 speech_type_ = kUndefined; | 644 speech_type_ = kUndefined; |
645 } | 645 } |
646 | 646 |
647 int offset = samples_per_channel_ * num_channels_; | 647 size_t offset = samples_per_channel_ * num_channels_; |
648 for (int i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { | 648 for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { |
649 data_[offset + i] = rhs.data_[i]; | 649 data_[offset + i] = rhs.data_[i]; |
650 } | 650 } |
651 samples_per_channel_ += rhs.samples_per_channel_; | 651 samples_per_channel_ += rhs.samples_per_channel_; |
652 return *this; | 652 return *this; |
653 } | 653 } |
654 | 654 |
655 namespace { | 655 namespace { |
656 inline int16_t ClampToInt16(int32_t input) { | 656 inline int16_t ClampToInt16(int32_t input) { |
657 if (input < -0x00008000) { | 657 if (input < -0x00008000) { |
658 return -0x8000; | 658 return -0x8000; |
(...skipping 29 matching lines...) Expand all Loading... |
688 vad_activity_ = kVadUnknown; | 688 vad_activity_ = kVadUnknown; |
689 } | 689 } |
690 | 690 |
691 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; | 691 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; |
692 | 692 |
693 if (noPrevData) { | 693 if (noPrevData) { |
694 memcpy(data_, rhs.data_, | 694 memcpy(data_, rhs.data_, |
695 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); | 695 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); |
696 } else { | 696 } else { |
697 // IMPROVEMENT this can be done very fast in assembly | 697 // IMPROVEMENT this can be done very fast in assembly |
698 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 698 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
699 int32_t wrap_guard = | 699 int32_t wrap_guard = |
700 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); | 700 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); |
701 data_[i] = ClampToInt16(wrap_guard); | 701 data_[i] = ClampToInt16(wrap_guard); |
702 } | 702 } |
703 } | 703 } |
704 energy_ = 0xffffffff; | 704 energy_ = 0xffffffff; |
705 return *this; | 705 return *this; |
706 } | 706 } |
707 | 707 |
708 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { | 708 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { |
709 // Sanity check | 709 // Sanity check |
710 assert((num_channels_ > 0) && (num_channels_ < 3)); | 710 assert((num_channels_ > 0) && (num_channels_ < 3)); |
711 assert(interleaved_ == rhs.interleaved_); | 711 assert(interleaved_ == rhs.interleaved_); |
712 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 712 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
713 | 713 |
714 if ((samples_per_channel_ != rhs.samples_per_channel_) || | 714 if ((samples_per_channel_ != rhs.samples_per_channel_) || |
715 (num_channels_ != rhs.num_channels_)) { | 715 (num_channels_ != rhs.num_channels_)) { |
716 return *this; | 716 return *this; |
717 } | 717 } |
718 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { | 718 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { |
719 vad_activity_ = kVadUnknown; | 719 vad_activity_ = kVadUnknown; |
720 } | 720 } |
721 speech_type_ = kUndefined; | 721 speech_type_ = kUndefined; |
722 | 722 |
723 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 723 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
724 int32_t wrap_guard = | 724 int32_t wrap_guard = |
725 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); | 725 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); |
726 data_[i] = ClampToInt16(wrap_guard); | 726 data_[i] = ClampToInt16(wrap_guard); |
727 } | 727 } |
728 energy_ = 0xffffffff; | 728 energy_ = 0xffffffff; |
729 return *this; | 729 return *this; |
730 } | 730 } |
731 | 731 |
732 inline bool IsNewerSequenceNumber(uint16_t sequence_number, | 732 inline bool IsNewerSequenceNumber(uint16_t sequence_number, |
733 uint16_t prev_sequence_number) { | 733 uint16_t prev_sequence_number) { |
(...skipping 26 matching lines...) Expand all Loading... |
760 : sequence_number2; | 760 : sequence_number2; |
761 } | 761 } |
762 | 762 |
763 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { | 763 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { |
764 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; | 764 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; |
765 } | 765 } |
766 | 766 |
767 } // namespace webrtc | 767 } // namespace webrtc |
768 | 768 |
769 #endif // MODULE_COMMON_TYPES_H | 769 #endif // MODULE_COMMON_TYPES_H |
OLD | NEW |