OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
335 * samples_per_channel_ * num_channels_ | 335 * samples_per_channel_ * num_channels_ |
336 * | 336 * |
337 * - Stereo data is interleaved starting with the left channel. | 337 * - Stereo data is interleaved starting with the left channel. |
338 * | 338 * |
339 * - The +operator assume that you would never add exactly opposite frames when | 339 * - The +operator assume that you would never add exactly opposite frames when |
340 * deciding the resulting state. To do this use the -operator. | 340 * deciding the resulting state. To do this use the -operator. |
341 */ | 341 */ |
342 class AudioFrame { | 342 class AudioFrame { |
343 public: | 343 public: |
344 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) | 344 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) |
345 static const int kMaxDataSizeSamples = 3840; | 345 static const size_t kMaxDataSizeSamples = 3840; |
346 | 346 |
347 enum VADActivity { | 347 enum VADActivity { |
348 kVadActive = 0, | 348 kVadActive = 0, |
349 kVadPassive = 1, | 349 kVadPassive = 1, |
350 kVadUnknown = 2 | 350 kVadUnknown = 2 |
351 }; | 351 }; |
352 enum SpeechType { | 352 enum SpeechType { |
353 kNormalSpeech = 0, | 353 kNormalSpeech = 0, |
354 kPLC = 1, | 354 kPLC = 1, |
355 kCNG = 2, | 355 kCNG = 2, |
356 kPLCCNG = 3, | 356 kPLCCNG = 3, |
357 kUndefined = 4 | 357 kUndefined = 4 |
358 }; | 358 }; |
359 | 359 |
360 AudioFrame(); | 360 AudioFrame(); |
361 virtual ~AudioFrame() {} | 361 virtual ~AudioFrame() {} |
362 | 362 |
363 // Resets all members to their default state (except does not modify the | 363 // Resets all members to their default state (except does not modify the |
364 // contents of |data_|). | 364 // contents of |data_|). |
365 void Reset(); | 365 void Reset(); |
366 | 366 |
367 // |interleaved_| is not changed by this method. | 367 // |interleaved_| is not changed by this method. |
368 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, | 368 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, |
369 int samples_per_channel, int sample_rate_hz, | 369 size_t samples_per_channel, int sample_rate_hz, |
370 SpeechType speech_type, VADActivity vad_activity, | 370 SpeechType speech_type, VADActivity vad_activity, |
371 int num_channels = 1, uint32_t energy = -1); | 371 int num_channels = 1, uint32_t energy = -1); |
372 | 372 |
373 AudioFrame& Append(const AudioFrame& rhs); | 373 AudioFrame& Append(const AudioFrame& rhs); |
374 | 374 |
375 void CopyFrom(const AudioFrame& src); | 375 void CopyFrom(const AudioFrame& src); |
376 | 376 |
377 void Mute(); | 377 void Mute(); |
378 | 378 |
379 AudioFrame& operator>>=(const int rhs); | 379 AudioFrame& operator>>=(const int rhs); |
380 AudioFrame& operator+=(const AudioFrame& rhs); | 380 AudioFrame& operator+=(const AudioFrame& rhs); |
381 AudioFrame& operator-=(const AudioFrame& rhs); | 381 AudioFrame& operator-=(const AudioFrame& rhs); |
382 | 382 |
383 int id_; | 383 int id_; |
384 // RTP timestamp of the first sample in the AudioFrame. | 384 // RTP timestamp of the first sample in the AudioFrame. |
385 uint32_t timestamp_; | 385 uint32_t timestamp_; |
386 // Time since the first frame in milliseconds. | 386 // Time since the first frame in milliseconds. |
387 // -1 represents an uninitialized value. | 387 // -1 represents an uninitialized value. |
388 int64_t elapsed_time_ms_; | 388 int64_t elapsed_time_ms_; |
389 // NTP time of the estimated capture time in local timebase in milliseconds. | 389 // NTP time of the estimated capture time in local timebase in milliseconds. |
390 // -1 represents an uninitialized value. | 390 // -1 represents an uninitialized value. |
391 int64_t ntp_time_ms_; | 391 int64_t ntp_time_ms_; |
392 int16_t data_[kMaxDataSizeSamples]; | 392 int16_t data_[kMaxDataSizeSamples]; |
393 int samples_per_channel_; | 393 size_t samples_per_channel_; |
394 int sample_rate_hz_; | 394 int sample_rate_hz_; |
395 int num_channels_; | 395 int num_channels_; |
396 SpeechType speech_type_; | 396 SpeechType speech_type_; |
397 VADActivity vad_activity_; | 397 VADActivity vad_activity_; |
398 // Note that there is no guarantee that |energy_| is correct. Any user of this | 398 // Note that there is no guarantee that |energy_| is correct. Any user of this |
399 // member must verify that the value is correct. | 399 // member must verify that the value is correct. |
400 // TODO(henrike) Remove |energy_|. | 400 // TODO(henrike) Remove |energy_|. |
401 // See https://code.google.com/p/webrtc/issues/detail?id=3315. | 401 // See https://code.google.com/p/webrtc/issues/detail?id=3315. |
402 uint32_t energy_; | 402 uint32_t energy_; |
403 bool interleaved_; | 403 bool interleaved_; |
(...skipping 19 matching lines...) Expand all Loading... |
423 num_channels_ = 0; | 423 num_channels_ = 0; |
424 speech_type_ = kUndefined; | 424 speech_type_ = kUndefined; |
425 vad_activity_ = kVadUnknown; | 425 vad_activity_ = kVadUnknown; |
426 energy_ = 0xffffffff; | 426 energy_ = 0xffffffff; |
427 interleaved_ = true; | 427 interleaved_ = true; |
428 } | 428 } |
429 | 429 |
430 inline void AudioFrame::UpdateFrame(int id, | 430 inline void AudioFrame::UpdateFrame(int id, |
431 uint32_t timestamp, | 431 uint32_t timestamp, |
432 const int16_t* data, | 432 const int16_t* data, |
433 int samples_per_channel, | 433 size_t samples_per_channel, |
434 int sample_rate_hz, | 434 int sample_rate_hz, |
435 SpeechType speech_type, | 435 SpeechType speech_type, |
436 VADActivity vad_activity, | 436 VADActivity vad_activity, |
437 int num_channels, | 437 int num_channels, |
438 uint32_t energy) { | 438 uint32_t energy) { |
439 id_ = id; | 439 id_ = id; |
440 timestamp_ = timestamp; | 440 timestamp_ = timestamp; |
441 samples_per_channel_ = samples_per_channel; | 441 samples_per_channel_ = samples_per_channel; |
442 sample_rate_hz_ = sample_rate_hz; | 442 sample_rate_hz_ = sample_rate_hz; |
443 speech_type_ = speech_type; | 443 speech_type_ = speech_type; |
444 vad_activity_ = vad_activity; | 444 vad_activity_ = vad_activity; |
445 num_channels_ = num_channels; | 445 num_channels_ = num_channels; |
446 energy_ = energy; | 446 energy_ = energy; |
447 | 447 |
448 assert(num_channels >= 0); | 448 assert(num_channels >= 0); |
449 const int length = samples_per_channel * num_channels; | 449 const size_t length = samples_per_channel * num_channels; |
450 assert(length <= kMaxDataSizeSamples); | 450 assert(length <= kMaxDataSizeSamples); |
451 if (data != NULL) { | 451 if (data != NULL) { |
452 memcpy(data_, data, sizeof(int16_t) * length); | 452 memcpy(data_, data, sizeof(int16_t) * length); |
453 } else { | 453 } else { |
454 memset(data_, 0, sizeof(int16_t) * length); | 454 memset(data_, 0, sizeof(int16_t) * length); |
455 } | 455 } |
456 } | 456 } |
457 | 457 |
458 inline void AudioFrame::CopyFrom(const AudioFrame& src) { | 458 inline void AudioFrame::CopyFrom(const AudioFrame& src) { |
459 if (this == &src) return; | 459 if (this == &src) return; |
460 | 460 |
461 id_ = src.id_; | 461 id_ = src.id_; |
462 timestamp_ = src.timestamp_; | 462 timestamp_ = src.timestamp_; |
463 elapsed_time_ms_ = src.elapsed_time_ms_; | 463 elapsed_time_ms_ = src.elapsed_time_ms_; |
464 ntp_time_ms_ = src.ntp_time_ms_; | 464 ntp_time_ms_ = src.ntp_time_ms_; |
465 samples_per_channel_ = src.samples_per_channel_; | 465 samples_per_channel_ = src.samples_per_channel_; |
466 sample_rate_hz_ = src.sample_rate_hz_; | 466 sample_rate_hz_ = src.sample_rate_hz_; |
467 speech_type_ = src.speech_type_; | 467 speech_type_ = src.speech_type_; |
468 vad_activity_ = src.vad_activity_; | 468 vad_activity_ = src.vad_activity_; |
469 num_channels_ = src.num_channels_; | 469 num_channels_ = src.num_channels_; |
470 energy_ = src.energy_; | 470 energy_ = src.energy_; |
471 interleaved_ = src.interleaved_; | 471 interleaved_ = src.interleaved_; |
472 | 472 |
473 assert(num_channels_ >= 0); | 473 assert(num_channels_ >= 0); |
474 const int length = samples_per_channel_ * num_channels_; | 474 const size_t length = samples_per_channel_ * num_channels_; |
475 assert(length <= kMaxDataSizeSamples); | 475 assert(length <= kMaxDataSizeSamples); |
476 memcpy(data_, src.data_, sizeof(int16_t) * length); | 476 memcpy(data_, src.data_, sizeof(int16_t) * length); |
477 } | 477 } |
478 | 478 |
479 inline void AudioFrame::Mute() { | 479 inline void AudioFrame::Mute() { |
480 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); | 480 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); |
481 } | 481 } |
482 | 482 |
483 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { | 483 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { |
484 assert((num_channels_ > 0) && (num_channels_ < 3)); | 484 assert((num_channels_ > 0) && (num_channels_ < 3)); |
485 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 485 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
486 | 486 |
487 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 487 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
488 data_[i] = static_cast<int16_t>(data_[i] >> rhs); | 488 data_[i] = static_cast<int16_t>(data_[i] >> rhs); |
489 } | 489 } |
490 return *this; | 490 return *this; |
491 } | 491 } |
492 | 492 |
493 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { | 493 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { |
494 // Sanity check | 494 // Sanity check |
495 assert((num_channels_ > 0) && (num_channels_ < 3)); | 495 assert((num_channels_ > 0) && (num_channels_ < 3)); |
496 assert(interleaved_ == rhs.interleaved_); | 496 assert(interleaved_ == rhs.interleaved_); |
497 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 497 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
498 if (num_channels_ != rhs.num_channels_) return *this; | 498 if (num_channels_ != rhs.num_channels_) return *this; |
499 | 499 |
500 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { | 500 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { |
501 vad_activity_ = kVadActive; | 501 vad_activity_ = kVadActive; |
502 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { | 502 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { |
503 vad_activity_ = kVadUnknown; | 503 vad_activity_ = kVadUnknown; |
504 } | 504 } |
505 if (speech_type_ != rhs.speech_type_) { | 505 if (speech_type_ != rhs.speech_type_) { |
506 speech_type_ = kUndefined; | 506 speech_type_ = kUndefined; |
507 } | 507 } |
508 | 508 |
509 int offset = samples_per_channel_ * num_channels_; | 509 size_t offset = samples_per_channel_ * num_channels_; |
510 for (int i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { | 510 for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { |
511 data_[offset + i] = rhs.data_[i]; | 511 data_[offset + i] = rhs.data_[i]; |
512 } | 512 } |
513 samples_per_channel_ += rhs.samples_per_channel_; | 513 samples_per_channel_ += rhs.samples_per_channel_; |
514 return *this; | 514 return *this; |
515 } | 515 } |
516 | 516 |
517 namespace { | 517 namespace { |
518 inline int16_t ClampToInt16(int32_t input) { | 518 inline int16_t ClampToInt16(int32_t input) { |
519 if (input < -0x00008000) { | 519 if (input < -0x00008000) { |
520 return -0x8000; | 520 return -0x8000; |
(...skipping 29 matching lines...) Expand all Loading... |
550 vad_activity_ = kVadUnknown; | 550 vad_activity_ = kVadUnknown; |
551 } | 551 } |
552 | 552 |
553 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; | 553 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; |
554 | 554 |
555 if (noPrevData) { | 555 if (noPrevData) { |
556 memcpy(data_, rhs.data_, | 556 memcpy(data_, rhs.data_, |
557 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); | 557 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); |
558 } else { | 558 } else { |
559 // IMPROVEMENT this can be done very fast in assembly | 559 // IMPROVEMENT this can be done very fast in assembly |
560 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 560 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
561 int32_t wrap_guard = | 561 int32_t wrap_guard = |
562 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); | 562 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); |
563 data_[i] = ClampToInt16(wrap_guard); | 563 data_[i] = ClampToInt16(wrap_guard); |
564 } | 564 } |
565 } | 565 } |
566 energy_ = 0xffffffff; | 566 energy_ = 0xffffffff; |
567 return *this; | 567 return *this; |
568 } | 568 } |
569 | 569 |
570 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { | 570 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { |
571 // Sanity check | 571 // Sanity check |
572 assert((num_channels_ > 0) && (num_channels_ < 3)); | 572 assert((num_channels_ > 0) && (num_channels_ < 3)); |
573 assert(interleaved_ == rhs.interleaved_); | 573 assert(interleaved_ == rhs.interleaved_); |
574 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 574 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
575 | 575 |
576 if ((samples_per_channel_ != rhs.samples_per_channel_) || | 576 if ((samples_per_channel_ != rhs.samples_per_channel_) || |
577 (num_channels_ != rhs.num_channels_)) { | 577 (num_channels_ != rhs.num_channels_)) { |
578 return *this; | 578 return *this; |
579 } | 579 } |
580 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { | 580 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { |
581 vad_activity_ = kVadUnknown; | 581 vad_activity_ = kVadUnknown; |
582 } | 582 } |
583 speech_type_ = kUndefined; | 583 speech_type_ = kUndefined; |
584 | 584 |
585 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 585 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
586 int32_t wrap_guard = | 586 int32_t wrap_guard = |
587 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); | 587 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); |
588 data_[i] = ClampToInt16(wrap_guard); | 588 data_[i] = ClampToInt16(wrap_guard); |
589 } | 589 } |
590 energy_ = 0xffffffff; | 590 energy_ = 0xffffffff; |
591 return *this; | 591 return *this; |
592 } | 592 } |
593 | 593 |
594 inline bool IsNewerSequenceNumber(uint16_t sequence_number, | 594 inline bool IsNewerSequenceNumber(uint16_t sequence_number, |
595 uint16_t prev_sequence_number) { | 595 uint16_t prev_sequence_number) { |
(...skipping 26 matching lines...) Expand all Loading... |
622 : sequence_number2; | 622 : sequence_number2; |
623 } | 623 } |
624 | 624 |
625 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { | 625 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { |
626 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; | 626 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; |
627 } | 627 } |
628 | 628 |
629 } // namespace webrtc | 629 } // namespace webrtc |
630 | 630 |
631 #endif // MODULE_COMMON_TYPES_H | 631 #endif // MODULE_COMMON_TYPES_H |
OLD | NEW |