webrtc/modules/audio_processing/include/audio_processing.h - Issue 1226093007: Allow more than 2 input channels in AudioProcessing.

Unified Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1226093007: Allow more than 2 input channels in AudioProcessing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Fix docs Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/include/audio_processing.h

diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h

index 6fa1c96c0771c14d141836dc0c88bf69ec9f5aea..6a8ef1b0e51e194f436557eb4e5d3f67597b45da 100644

--- a/webrtc/modules/audio_processing/include/audio_processing.h

+++ b/webrtc/modules/audio_processing/include/audio_processing.h

@@ -29,6 +29,9 @@ class AudioFrame;

template<typename T>

class Beamformer;

+class StreamConfig;

+class ProcessingConfig;

class EchoCancellation;

class EchoControlMobile;

class GainControl;

@@ -84,7 +87,7 @@ static const int kAgcStartupMinVolume = 0;

#endif // defined(WEBRTC_CHROMIUM_BUILD)

struct ExperimentalAgc {

ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}

- ExperimentalAgc(bool enabled)

+ explicit ExperimentalAgc(bool enabled)

: enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}

ExperimentalAgc(bool enabled, int startup_min_volume)

: enabled(enabled), startup_min_volume(startup_min_volume) {}

@@ -199,6 +202,7 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000;

class AudioProcessing {

public:

+ // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.

enum ChannelLayout {

kMono,

// Left, right.

@@ -236,10 +240,17 @@ class AudioProcessing {

// The int16 interfaces require:

// - only |NativeRate|s be used

// - that the input, output and reverse rates must match

- // - that |output_layout| matches |input_layout|

+ // - that |processing_config.output_stream()| matches

+ // |processing_config.input_stream()|.

+ //

+ // The float interfaces accept arbitrary rates and support differing input and

+ // output layouts, but the output must have either one channel or the same

+ // number of channels as the input.

+ virtual int Initialize(const ProcessingConfig& processing_config) = 0;

+ // Initialize with unpacked parameters. See Initialize() above for details.

- // The float interfaces accept arbitrary rates and support differing input

- // and output layouts, but the output may only remove channels, not add.

+ // TODO(mgraczyk): Remove once clients are updated to use the new interface.

virtual int Initialize(int input_sample_rate_hz,

int output_sample_rate_hz,

int reverse_sample_rate_hz,

@@ -292,8 +303,10 @@ class AudioProcessing {

// |input_layout|. At output, the channels will be arranged according to

// |output_layout| at |output_sample_rate_hz| in |dest|.

- // The output layout may only remove channels, not add. |src| and |dest|

- // may use the same memory, if desired.

+ // The output layout must have one channel or as many channels as the input.

+ // |src| and |dest| may use the same memory, if desired.

+ //

+ // TODO(mgraczyk): Remove once clients are updated to use the new interface.

virtual int ProcessStream(const float* const* src,

int samples_per_channel,

int input_sample_rate_hz,

@@ -302,6 +315,18 @@ class AudioProcessing {

ChannelLayout output_layout,

float* const* dest) = 0;

+ // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

+ // |src| points to a channel buffer, arranged according to |input_stream|. At

+ // output, the channels will be arranged according to |output_stream| in

+ // |dest|.

+ //

+ // The output must have one channel or as many channels as the input. |src|

+ // and |dest| may use the same memory, if desired.

+ virtual int ProcessStream(const float* const* src,

+ const StreamConfig& input_config,

+ const StreamConfig& output_config,

+ float* const* dest) = 0;

// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame

// will not be modified. On the client-side, this is the far-end (or to be

// rendered) audio.

@@ -321,11 +346,18 @@ class AudioProcessing {

// Accepts deinterleaved float audio with the range [-1, 1]. Each element

// of |data| points to a channel buffer, arranged according to |layout|.

+ //

+ // TODO(mgraczyk): Remove once clients are updated to use the new interface.

virtual int AnalyzeReverseStream(const float* const* data,

int samples_per_channel,

int sample_rate_hz,

ChannelLayout layout) = 0;

+ // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

+ // |data| points to a channel buffer, arranged according to |reverse_config|.

+ virtual int AnalyzeReverseStream(const float* const* data,

+ const StreamConfig& reverse_config) = 0;

// This must be called if and only if echo processing is enabled.

// Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end

@@ -432,6 +464,102 @@ class AudioProcessing {

static const int kChunkSizeMs = 10;

};

+class StreamConfig {

+ public:

+ // sample_rate_hz: The sampling rate of the stream.

+ //

+ // num_channels: The number of audio channels in the stream, excluding the

+ // keyboard channel if it is present. When passing a

+ // StreamConfig with an array of arrays T*[N],

+ //

+ // N == {num_channels + 1 if has_keyboard

+ // {num_channels if !has_keyboard

+ //

+ // has_keyboard: True if the stream has a keyboard channel. When has_keyboard

+ // is true, the last channel in any corresponding list of

+ // channels is the keyboard channel.

+ StreamConfig(int sample_rate_hz = 0,

+ int num_channels = 0,

+ bool has_keyboard = false)

+ : sample_rate_hz_(sample_rate_hz),

+ num_channels_(num_channels),

+ has_keyboard_(has_keyboard),

+ num_frames_(calculate_frames(sample_rate_hz)) {}

+ void set_sample_rate_hz(int value) {

+ sample_rate_hz_ = value;

+ num_frames_ = calculate_frames(value);

+ }

+ void set_num_channels(int value) { num_channels_ = value; }

+ void set_has_keyboard(bool value) { has_keyboard_ = value; }

+ int sample_rate_hz() const { return sample_rate_hz_; }

+ // The number of channels in the stream, not including the keyboard channel if

+ // present.

+ int num_channels() const { return num_channels_; }

+ bool has_keyboard() const { return has_keyboard_; }

+ int num_frames() const { return num_frames_; }

+ bool operator==(const StreamConfig& other) const {

+ return sample_rate_hz_ == other.sample_rate_hz_ &&

+ num_channels_ == other.num_channels_ &&

+ has_keyboard_ == other.has_keyboard_;

+ }

+ bool operator!=(const StreamConfig& other) const { return !(*this == other); }

+ private:

+ static int calculate_frames(int sample_rate_hz) {

+ return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;

+ }

+ int sample_rate_hz_;

+ int num_channels_;

+ bool has_keyboard_;

+ int num_frames_;

+};

+class ProcessingConfig {

+ public:

+ enum StreamName {

+ kInputStream,

+ kOutputStream,

+ kReverseStream,

+ kNumStreamNames,

+ };

+ const StreamConfig& input_stream() const {

+ return streams[StreamName::kInputStream];

+ }

+ const StreamConfig& output_stream() const {

+ return streams[StreamName::kOutputStream];

+ }

+ const StreamConfig& reverse_stream() const {

+ return streams[StreamName::kReverseStream];

+ }

+ StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }

+ StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }

+ StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }

+ bool operator==(const ProcessingConfig& other) const {

+ for (int i = 0; i < StreamName::kNumStreamNames; ++i) {

+ if (this->streams[i] != other.streams[i]) {

+ return false;

+ }

+ return true;

+ }

+ bool operator!=(const ProcessingConfig& other) const {

+ return !(*this == other);

+ }

+ StreamConfig streams[StreamName::kNumStreamNames];

+};

// The acoustic echo cancellation (AEC) component provides better performance

// than AECM but also requires more processing power and is dependent on delay

// stability and reporting accuracy. As such it is well-suited and recommended