| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 335 * samples_per_channel_ * num_channels_ | 335 * samples_per_channel_ * num_channels_ |
| 336 * | 336 * |
| 337 * - Stereo data is interleaved starting with the left channel. | 337 * - Stereo data is interleaved starting with the left channel. |
| 338 * | 338 * |
| 339 * - The +operator assume that you would never add exactly opposite frames when | 339 * - The +operator assume that you would never add exactly opposite frames when |
| 340 * deciding the resulting state. To do this use the -operator. | 340 * deciding the resulting state. To do this use the -operator. |
| 341 */ | 341 */ |
| 342 class AudioFrame { | 342 class AudioFrame { |
| 343 public: | 343 public: |
| 344 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) | 344 // Stereo, 32 kHz, 60 ms (2 * 32 * 60) |
| 345 static const int kMaxDataSizeSamples = 3840; | 345 static const size_t kMaxDataSizeSamples = 3840; |
| 346 | 346 |
| 347 enum VADActivity { | 347 enum VADActivity { |
| 348 kVadActive = 0, | 348 kVadActive = 0, |
| 349 kVadPassive = 1, | 349 kVadPassive = 1, |
| 350 kVadUnknown = 2 | 350 kVadUnknown = 2 |
| 351 }; | 351 }; |
| 352 enum SpeechType { | 352 enum SpeechType { |
| 353 kNormalSpeech = 0, | 353 kNormalSpeech = 0, |
| 354 kPLC = 1, | 354 kPLC = 1, |
| 355 kCNG = 2, | 355 kCNG = 2, |
| 356 kPLCCNG = 3, | 356 kPLCCNG = 3, |
| 357 kUndefined = 4 | 357 kUndefined = 4 |
| 358 }; | 358 }; |
| 359 | 359 |
| 360 AudioFrame(); | 360 AudioFrame(); |
| 361 virtual ~AudioFrame() {} | 361 virtual ~AudioFrame() {} |
| 362 | 362 |
| 363 // Resets all members to their default state (except does not modify the | 363 // Resets all members to their default state (except does not modify the |
| 364 // contents of |data_|). | 364 // contents of |data_|). |
| 365 void Reset(); | 365 void Reset(); |
| 366 | 366 |
| 367 // |interleaved_| is not changed by this method. | 367 // |interleaved_| is not changed by this method. |
| 368 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, | 368 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, |
| 369 int samples_per_channel, int sample_rate_hz, | 369 size_t samples_per_channel, int sample_rate_hz, |
| 370 SpeechType speech_type, VADActivity vad_activity, | 370 SpeechType speech_type, VADActivity vad_activity, |
| 371 int num_channels = 1, uint32_t energy = -1); | 371 int num_channels = 1, uint32_t energy = -1); |
| 372 | 372 |
| 373 AudioFrame& Append(const AudioFrame& rhs); | 373 AudioFrame& Append(const AudioFrame& rhs); |
| 374 | 374 |
| 375 void CopyFrom(const AudioFrame& src); | 375 void CopyFrom(const AudioFrame& src); |
| 376 | 376 |
| 377 void Mute(); | 377 void Mute(); |
| 378 | 378 |
| 379 AudioFrame& operator>>=(const int rhs); | 379 AudioFrame& operator>>=(const int rhs); |
| 380 AudioFrame& operator+=(const AudioFrame& rhs); | 380 AudioFrame& operator+=(const AudioFrame& rhs); |
| 381 AudioFrame& operator-=(const AudioFrame& rhs); | 381 AudioFrame& operator-=(const AudioFrame& rhs); |
| 382 | 382 |
| 383 int id_; | 383 int id_; |
| 384 // RTP timestamp of the first sample in the AudioFrame. | 384 // RTP timestamp of the first sample in the AudioFrame. |
| 385 uint32_t timestamp_; | 385 uint32_t timestamp_; |
| 386 // Time since the first frame in milliseconds. | 386 // Time since the first frame in milliseconds. |
| 387 // -1 represents an uninitialized value. | 387 // -1 represents an uninitialized value. |
| 388 int64_t elapsed_time_ms_; | 388 int64_t elapsed_time_ms_; |
| 389 // NTP time of the estimated capture time in local timebase in milliseconds. | 389 // NTP time of the estimated capture time in local timebase in milliseconds. |
| 390 // -1 represents an uninitialized value. | 390 // -1 represents an uninitialized value. |
| 391 int64_t ntp_time_ms_; | 391 int64_t ntp_time_ms_; |
| 392 int16_t data_[kMaxDataSizeSamples]; | 392 int16_t data_[kMaxDataSizeSamples]; |
| 393 int samples_per_channel_; | 393 size_t samples_per_channel_; |
| 394 int sample_rate_hz_; | 394 int sample_rate_hz_; |
| 395 int num_channels_; | 395 int num_channels_; |
| 396 SpeechType speech_type_; | 396 SpeechType speech_type_; |
| 397 VADActivity vad_activity_; | 397 VADActivity vad_activity_; |
| 398 // Note that there is no guarantee that |energy_| is correct. Any user of this | 398 // Note that there is no guarantee that |energy_| is correct. Any user of this |
| 399 // member must verify that the value is correct. | 399 // member must verify that the value is correct. |
| 400 // TODO(henrike) Remove |energy_|. | 400 // TODO(henrike) Remove |energy_|. |
| 401 // See https://code.google.com/p/webrtc/issues/detail?id=3315. | 401 // See https://code.google.com/p/webrtc/issues/detail?id=3315. |
| 402 uint32_t energy_; | 402 uint32_t energy_; |
| 403 bool interleaved_; | 403 bool interleaved_; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 423 num_channels_ = 0; | 423 num_channels_ = 0; |
| 424 speech_type_ = kUndefined; | 424 speech_type_ = kUndefined; |
| 425 vad_activity_ = kVadUnknown; | 425 vad_activity_ = kVadUnknown; |
| 426 energy_ = 0xffffffff; | 426 energy_ = 0xffffffff; |
| 427 interleaved_ = true; | 427 interleaved_ = true; |
| 428 } | 428 } |
| 429 | 429 |
| 430 inline void AudioFrame::UpdateFrame(int id, | 430 inline void AudioFrame::UpdateFrame(int id, |
| 431 uint32_t timestamp, | 431 uint32_t timestamp, |
| 432 const int16_t* data, | 432 const int16_t* data, |
| 433 int samples_per_channel, | 433 size_t samples_per_channel, |
| 434 int sample_rate_hz, | 434 int sample_rate_hz, |
| 435 SpeechType speech_type, | 435 SpeechType speech_type, |
| 436 VADActivity vad_activity, | 436 VADActivity vad_activity, |
| 437 int num_channels, | 437 int num_channels, |
| 438 uint32_t energy) { | 438 uint32_t energy) { |
| 439 id_ = id; | 439 id_ = id; |
| 440 timestamp_ = timestamp; | 440 timestamp_ = timestamp; |
| 441 samples_per_channel_ = samples_per_channel; | 441 samples_per_channel_ = samples_per_channel; |
| 442 sample_rate_hz_ = sample_rate_hz; | 442 sample_rate_hz_ = sample_rate_hz; |
| 443 speech_type_ = speech_type; | 443 speech_type_ = speech_type; |
| 444 vad_activity_ = vad_activity; | 444 vad_activity_ = vad_activity; |
| 445 num_channels_ = num_channels; | 445 num_channels_ = num_channels; |
| 446 energy_ = energy; | 446 energy_ = energy; |
| 447 | 447 |
| 448 assert(num_channels >= 0); | 448 assert(num_channels >= 0); |
| 449 const int length = samples_per_channel * num_channels; | 449 const size_t length = samples_per_channel * num_channels; |
| 450 assert(length <= kMaxDataSizeSamples); | 450 assert(length <= kMaxDataSizeSamples); |
| 451 if (data != NULL) { | 451 if (data != NULL) { |
| 452 memcpy(data_, data, sizeof(int16_t) * length); | 452 memcpy(data_, data, sizeof(int16_t) * length); |
| 453 } else { | 453 } else { |
| 454 memset(data_, 0, sizeof(int16_t) * length); | 454 memset(data_, 0, sizeof(int16_t) * length); |
| 455 } | 455 } |
| 456 } | 456 } |
| 457 | 457 |
| 458 inline void AudioFrame::CopyFrom(const AudioFrame& src) { | 458 inline void AudioFrame::CopyFrom(const AudioFrame& src) { |
| 459 if (this == &src) return; | 459 if (this == &src) return; |
| 460 | 460 |
| 461 id_ = src.id_; | 461 id_ = src.id_; |
| 462 timestamp_ = src.timestamp_; | 462 timestamp_ = src.timestamp_; |
| 463 elapsed_time_ms_ = src.elapsed_time_ms_; | 463 elapsed_time_ms_ = src.elapsed_time_ms_; |
| 464 ntp_time_ms_ = src.ntp_time_ms_; | 464 ntp_time_ms_ = src.ntp_time_ms_; |
| 465 samples_per_channel_ = src.samples_per_channel_; | 465 samples_per_channel_ = src.samples_per_channel_; |
| 466 sample_rate_hz_ = src.sample_rate_hz_; | 466 sample_rate_hz_ = src.sample_rate_hz_; |
| 467 speech_type_ = src.speech_type_; | 467 speech_type_ = src.speech_type_; |
| 468 vad_activity_ = src.vad_activity_; | 468 vad_activity_ = src.vad_activity_; |
| 469 num_channels_ = src.num_channels_; | 469 num_channels_ = src.num_channels_; |
| 470 energy_ = src.energy_; | 470 energy_ = src.energy_; |
| 471 interleaved_ = src.interleaved_; | 471 interleaved_ = src.interleaved_; |
| 472 | 472 |
| 473 assert(num_channels_ >= 0); | 473 assert(num_channels_ >= 0); |
| 474 const int length = samples_per_channel_ * num_channels_; | 474 const size_t length = samples_per_channel_ * num_channels_; |
| 475 assert(length <= kMaxDataSizeSamples); | 475 assert(length <= kMaxDataSizeSamples); |
| 476 memcpy(data_, src.data_, sizeof(int16_t) * length); | 476 memcpy(data_, src.data_, sizeof(int16_t) * length); |
| 477 } | 477 } |
| 478 | 478 |
| 479 inline void AudioFrame::Mute() { | 479 inline void AudioFrame::Mute() { |
| 480 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); | 480 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); |
| 481 } | 481 } |
| 482 | 482 |
| 483 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { | 483 inline AudioFrame& AudioFrame::operator>>=(const int rhs) { |
| 484 assert((num_channels_ > 0) && (num_channels_ < 3)); | 484 assert((num_channels_ > 0) && (num_channels_ < 3)); |
| 485 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 485 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
| 486 | 486 |
| 487 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 487 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
| 488 data_[i] = static_cast<int16_t>(data_[i] >> rhs); | 488 data_[i] = static_cast<int16_t>(data_[i] >> rhs); |
| 489 } | 489 } |
| 490 return *this; | 490 return *this; |
| 491 } | 491 } |
| 492 | 492 |
| 493 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { | 493 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { |
| 494 // Sanity check | 494 // Sanity check |
| 495 assert((num_channels_ > 0) && (num_channels_ < 3)); | 495 assert((num_channels_ > 0) && (num_channels_ < 3)); |
| 496 assert(interleaved_ == rhs.interleaved_); | 496 assert(interleaved_ == rhs.interleaved_); |
| 497 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 497 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
| 498 if (num_channels_ != rhs.num_channels_) return *this; | 498 if (num_channels_ != rhs.num_channels_) return *this; |
| 499 | 499 |
| 500 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { | 500 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { |
| 501 vad_activity_ = kVadActive; | 501 vad_activity_ = kVadActive; |
| 502 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { | 502 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { |
| 503 vad_activity_ = kVadUnknown; | 503 vad_activity_ = kVadUnknown; |
| 504 } | 504 } |
| 505 if (speech_type_ != rhs.speech_type_) { | 505 if (speech_type_ != rhs.speech_type_) { |
| 506 speech_type_ = kUndefined; | 506 speech_type_ = kUndefined; |
| 507 } | 507 } |
| 508 | 508 |
| 509 int offset = samples_per_channel_ * num_channels_; | 509 size_t offset = samples_per_channel_ * num_channels_; |
| 510 for (int i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { | 510 for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { |
| 511 data_[offset + i] = rhs.data_[i]; | 511 data_[offset + i] = rhs.data_[i]; |
| 512 } | 512 } |
| 513 samples_per_channel_ += rhs.samples_per_channel_; | 513 samples_per_channel_ += rhs.samples_per_channel_; |
| 514 return *this; | 514 return *this; |
| 515 } | 515 } |
| 516 | 516 |
| 517 namespace { | 517 namespace { |
| 518 inline int16_t ClampToInt16(int32_t input) { | 518 inline int16_t ClampToInt16(int32_t input) { |
| 519 if (input < -0x00008000) { | 519 if (input < -0x00008000) { |
| 520 return -0x8000; | 520 return -0x8000; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 550 vad_activity_ = kVadUnknown; | 550 vad_activity_ = kVadUnknown; |
| 551 } | 551 } |
| 552 | 552 |
| 553 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; | 553 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; |
| 554 | 554 |
| 555 if (noPrevData) { | 555 if (noPrevData) { |
| 556 memcpy(data_, rhs.data_, | 556 memcpy(data_, rhs.data_, |
| 557 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); | 557 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); |
| 558 } else { | 558 } else { |
| 559 // IMPROVEMENT this can be done very fast in assembly | 559 // IMPROVEMENT this can be done very fast in assembly |
| 560 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 560 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
| 561 int32_t wrap_guard = | 561 int32_t wrap_guard = |
| 562 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); | 562 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]); |
| 563 data_[i] = ClampToInt16(wrap_guard); | 563 data_[i] = ClampToInt16(wrap_guard); |
| 564 } | 564 } |
| 565 } | 565 } |
| 566 energy_ = 0xffffffff; | 566 energy_ = 0xffffffff; |
| 567 return *this; | 567 return *this; |
| 568 } | 568 } |
| 569 | 569 |
| 570 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { | 570 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { |
| 571 // Sanity check | 571 // Sanity check |
| 572 assert((num_channels_ > 0) && (num_channels_ < 3)); | 572 assert((num_channels_ > 0) && (num_channels_ < 3)); |
| 573 assert(interleaved_ == rhs.interleaved_); | 573 assert(interleaved_ == rhs.interleaved_); |
| 574 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; | 574 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; |
| 575 | 575 |
| 576 if ((samples_per_channel_ != rhs.samples_per_channel_) || | 576 if ((samples_per_channel_ != rhs.samples_per_channel_) || |
| 577 (num_channels_ != rhs.num_channels_)) { | 577 (num_channels_ != rhs.num_channels_)) { |
| 578 return *this; | 578 return *this; |
| 579 } | 579 } |
| 580 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { | 580 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { |
| 581 vad_activity_ = kVadUnknown; | 581 vad_activity_ = kVadUnknown; |
| 582 } | 582 } |
| 583 speech_type_ = kUndefined; | 583 speech_type_ = kUndefined; |
| 584 | 584 |
| 585 for (int i = 0; i < samples_per_channel_ * num_channels_; i++) { | 585 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { |
| 586 int32_t wrap_guard = | 586 int32_t wrap_guard = |
| 587 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); | 587 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]); |
| 588 data_[i] = ClampToInt16(wrap_guard); | 588 data_[i] = ClampToInt16(wrap_guard); |
| 589 } | 589 } |
| 590 energy_ = 0xffffffff; | 590 energy_ = 0xffffffff; |
| 591 return *this; | 591 return *this; |
| 592 } | 592 } |
| 593 | 593 |
| 594 inline bool IsNewerSequenceNumber(uint16_t sequence_number, | 594 inline bool IsNewerSequenceNumber(uint16_t sequence_number, |
| 595 uint16_t prev_sequence_number) { | 595 uint16_t prev_sequence_number) { |
| (...skipping 26 matching lines...) Expand all Loading... |
| 622 : sequence_number2; | 622 : sequence_number2; |
| 623 } | 623 } |
| 624 | 624 |
| 625 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { | 625 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { |
| 626 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; | 626 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; |
| 627 } | 627 } |
| 628 | 628 |
| 629 } // namespace webrtc | 629 } // namespace webrtc |
| 630 | 630 |
| 631 #endif // MODULE_COMMON_TYPES_H | 631 #endif // MODULE_COMMON_TYPES_H |
| OLD | NEW |