webrtc/modules/include/module_common_types.h - Issue 2750783004: Add mute state field to AudioFrame.

Unified Diff: webrtc/modules/include/module_common_types.h

Issue 2750783004: Add mute state field to AudioFrame. (Closed)

Patch Set: Address review comments Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« webrtc/modules/audio_mixer/audio_frame_manipulator.cc ('K') | « webrtc/modules/audio_processing/test/audio_processing_simulator.cc ('k') | webrtc/tools/agc/activity_metric.cc » ('j') | webrtc/voice_engine/file_recorder.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/include/module_common_types.h

diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h

index 98f7a38af204fe32a17e7c5f84b9d205c203dc2a..0d4e1629e7a6d6e2eb9b48bb2443347ba142409d 100644

--- a/webrtc/modules/include/module_common_types.h

+++ b/webrtc/modules/include/module_common_types.h

@@ -271,17 +271,21 @@ class CallStatsObserver {

* states.

* Notes

- * - The total number of samples in |data_| is

- * samples_per_channel_ * num_channels_

- *

+ * - The total number of samples is samples_per_channel_ * num_channels_

* - Stereo data is interleaved starting with the left channel.

- *

class AudioFrame {

public:

- // Stereo, 32 kHz, 60 ms (2 * 32 * 60)

+ // Using constexpr here causes linker errors unless the variable also has an

+ // out-of-class definition, which is impractical in this header-only class.

+ // (This makes no sense because it compiles as an enum value, which we most

+ // certainly cannot take the address of, just fine.) C++17 introduces inline

+ // variables which should allow us to switch to constexpr and keep this a

+ // header-only class.

enum : size_t {

- kMaxDataSizeSamples = 3840

+ // Stereo, 32 kHz, 60 ms (2 * 32 * 60)

+ kMaxDataSizeSamples = 3840,

+ kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t),

};

enum VADActivity {

@@ -299,8 +303,7 @@ class AudioFrame {

AudioFrame();

- // Resets all members to their default state (except does not modify the

- // contents of |data_|).

+ // Resets all members to their default state.

void Reset();

void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,

@@ -310,11 +313,21 @@ class AudioFrame {

void CopyFrom(const AudioFrame& src);

+ // data() returns a zeroed static buffer if the frame is muted.

+ // mutable_frame() always returns a non-static buffer; the first call to

+ // mutable_frame() zeros the non-static buffer and marks the frame unmuted.

+ const int16_t* data() const;

+ int16_t* mutable_data();

+ // Prefer to mute frames using AudioFrameOperations::Mute.

+ void Mute();

+ // Frame is muted by default.

+ bool muted() const;

// These methods are deprecated. Use the functions in

// webrtc/audio/utility instead. These methods will exists for a

// short period of time until webrtc clients have updated. See

// webrtc:6548 for details.

- RTC_DEPRECATED void Mute();

RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);

RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);

@@ -327,7 +340,6 @@ class AudioFrame {

// NTP time of the estimated capture time in local timebase in milliseconds.

// -1 represents an uninitialized value.

int64_t ntp_time_ms_ = -1;

- int16_t data_[kMaxDataSizeSamples];

size_t samples_per_channel_ = 0;

int sample_rate_hz_ = 0;

size_t num_channels_ = 0;

@@ -335,14 +347,23 @@ class AudioFrame {

VADActivity vad_activity_ = kVadUnknown;

private:

+ // A permamently zeroed out buffer to represent muted frames. This is a

+ // header-only class, so the only way to avoid creating a separate empty

+ // buffer per translation unit is to wrap a static in an inline function.

+ static const int16_t* empty_data() {

+ static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};

+ static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");

+ return kEmptyData;

+ }

+ int16_t data_[kMaxDataSizeSamples];

+ static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");

+ bool muted_ = true;

RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);

};

-// TODO(henrik.lundin) Can we remove the call to data_()?

-// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.

-inline AudioFrame::AudioFrame()

- : data_() {

+inline AudioFrame::AudioFrame() {}

inline void AudioFrame::Reset() {

id_ = -1;

@@ -351,6 +372,7 @@ inline void AudioFrame::Reset() {

timestamp_ = 0;

elapsed_time_ms_ = -1;

ntp_time_ms_ = -1;

+ muted_ = true;

samples_per_channel_ = 0;

sample_rate_hz_ = 0;

num_channels_ = 0;

@@ -376,10 +398,10 @@ inline void AudioFrame::UpdateFrame(int id,

const size_t length = samples_per_channel * num_channels;

assert(length <= kMaxDataSizeSamples);

- if (data != NULL) {

+ if (data != nullptr) {

memcpy(data_, data, sizeof(int16_t) * length);

} else {

- memset(data_, 0, sizeof(int16_t) * length);

+ muted_ = true;

}

@@ -390,6 +412,7 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {

timestamp_ = src.timestamp_;

elapsed_time_ms_ = src.elapsed_time_ms_;

ntp_time_ms_ = src.ntp_time_ms_;

+ muted_ = src.muted();

samples_per_channel_ = src.samples_per_channel_;

sample_rate_hz_ = src.sample_rate_hz_;

speech_type_ = src.speech_type_;

@@ -398,16 +421,35 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {

const size_t length = samples_per_channel_ * num_channels_;

assert(length <= kMaxDataSizeSamples);

- memcpy(data_, src.data_, sizeof(int16_t) * length);

+ if (!src.muted()) {

+ memcpy(data_, src.data(), sizeof(int16_t) * length);

+ }

+inline const int16_t* AudioFrame::data() const {

+ return muted_ ? empty_data() : data_;

+// TODO(henrik.lundin) Can we skip zeroing the buffer?

+// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.

+inline int16_t* AudioFrame::mutable_data() {

+ if (muted_) {

+ memset(data_, 0, size_t(kMaxDataSizeBytes));

+ muted_ = false;

+ }

+ return data_;

}

inline void AudioFrame::Mute() {

- memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));

+ muted_ = true;

}

+inline bool AudioFrame::muted() const { return muted_; }

inline AudioFrame& AudioFrame::operator>>=(const int rhs) {

assert((num_channels_ > 0) && (num_channels_ < 3));

if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;

+ if (muted_) return *this;

for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {

data_[i] = static_cast<int16_t>(data_[i] >> rhs);

@@ -420,8 +462,9 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {

assert((num_channels_ > 0) && (num_channels_ < 3));

if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;

if (num_channels_ != rhs.num_channels_) return *this;

+ if (rhs.muted()) return *this;

- bool noPrevData = false;

+ bool noPrevData = muted_;

if (samples_per_channel_ != rhs.samples_per_channel_) {

if (samples_per_channel_ == 0) {

// special case we have no data to start with

@@ -440,8 +483,9 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {

if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;

+ muted_ = false;

if (noPrevData) {

- memcpy(data_, rhs.data_,

+ memcpy(data_, rhs.data(),

sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);

} else {

// IMPROVEMENT this can be done very fast in assembly