webrtc/modules/audio_processing/include/audio_processing.h - Issue 1226093007: Allow more than 2 input channels in AudioProcessing.

Unified Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1226093007: Allow more than 2 input channels in AudioProcessing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Fix mac build Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/audio_processing_impl.cc ('K') | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | webrtc/modules/modules.gyp » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/include/audio_processing.h

diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h

index 6fa1c96c0771c14d141836dc0c88bf69ec9f5aea..800aa1917538f7d83cbf921d6658f4000f31b97a 100644

--- a/webrtc/modules/audio_processing/include/audio_processing.h

+++ b/webrtc/modules/audio_processing/include/audio_processing.h

@@ -29,6 +29,9 @@ class AudioFrame;

template<typename T>

class Beamformer;

+class StreamConfig;

+struct ProcessingConfig;

class EchoCancellation;

class EchoControlMobile;

class GainControl;

@@ -84,7 +87,7 @@ static const int kAgcStartupMinVolume = 0;

#endif // defined(WEBRTC_CHROMIUM_BUILD)

struct ExperimentalAgc {

ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}

- ExperimentalAgc(bool enabled)

+ explicit ExperimentalAgc(bool enabled)

: enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}

ExperimentalAgc(bool enabled, int startup_min_volume)

: enabled(enabled), startup_min_volume(startup_min_volume) {}

@@ -236,10 +239,15 @@ class AudioProcessing {

// The int16 interfaces require:

// - only |NativeRate|s be used

// - that the input, output and reverse rates must match

- // - that |output_layout| matches |input_layout|

+ // - that |processing_config.output_stream()| matches

+ // |processing_config.input_stream()|.

- // The float interfaces accept arbitrary rates and support differing input

- // and output layouts, but the output may only remove channels, not add.

+ // The float interfaces accept arbitrary rates and support differing input and

+ // output layouts, but the output must have either one channel or the same

+ // number of channels as the input.

+ virtual int Initialize(const ProcessingConfig& processing_config) = 0;

+ // Initialize with unpacked parameters. See Initialize() above for details.

virtual int Initialize(int input_sample_rate_hz,

int output_sample_rate_hz,

int reverse_sample_rate_hz,

@@ -292,8 +300,8 @@ class AudioProcessing {

// |input_layout|. At output, the channels will be arranged according to

// |output_layout| at |output_sample_rate_hz| in |dest|.

- // The output layout may only remove channels, not add. |src| and |dest|

- // may use the same memory, if desired.

+ // The output layout must have one channel or as many channels as the input.

+ // |src| and |dest| may use the same memory, if desired.

virtual int ProcessStream(const float* const* src,

int samples_per_channel,

int input_sample_rate_hz,

@@ -302,6 +310,17 @@ class AudioProcessing {

ChannelLayout output_layout,

float* const* dest) = 0;

+ // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

+ // |src| points to a channel buffer, arranged according to

+ // |processing_config.input_stream()|. At output, the channels will be

+ // arranged according to |processing_config.output_stream()| in |dest|.

+ //

+ // The output must have one channel or as many channels as the input. |src|

+ // and |dest| may use the same memory, if desired.

+ virtual int ProcessStream(const float* const* src,

aluebs-webrtc 2015/07/14 23:12:44 The idea is to migrate to this interface and get r

mgraczyk 2015/07/15 01:12:47 Yes, I am hoping that is the end result. With a "

aluebs-webrtc 2015/07/15 18:04:06 Agreed.

+ const ProcessingConfig& processing_config,

aluebs-webrtc 2015/07/14 23:12:43 Here you are passing in a ReverseStream as well. Y

mgraczyk 2015/07/15 01:12:47 That's true, but this makes the interface more con

aluebs-webrtc 2015/07/15 18:04:06 I personally prefer to have 2 StreamConfigs to be

mgraczyk 2015/07/15 20:03:20 They are all used. If the reverse stream configur

aluebs-webrtc 2015/07/15 21:29:17 But before you could only change the reverse strea

mgraczyk 2015/07/15 21:53:56 Calls to ProcessStream didn't change the reverse s

Calls to ProcessStream didn't change the reverse stream config, but they did reset its buffers and clear the current state when the forward stream config changed. Previously, the old reverse config was reused implicitly. Isn't it better to require clients to explicitly specify a new config when the reverse stream is reset rather than just using the old one? Clients will not have to specify new config or use defaults for the reverse stream. They will use the same ProcessingConfig object they used to create APM on calls to ProcessStream and AnalyzeReverseStream. I think calling code will look like: apm->Initialize(my_config_); ... apm->ProcessStream(my_input_, my_config_, my_output); ... apm->AnalyzeReverseStream(my_reverse_data_, my_config_->reverse_stream()); Compare that to apm->Initialize(my_config_); ... apm->ProcessStream(my_input_, my_config_->input_stream(), my_config_->output_stream(), my_output); ... apm->AnalyzeReverseStream(my_reverse_data_, my_config_->reverse_stream()); I'll change it if you really want me to.

aluebs-webrtc 2015/07/16 00:20:52 Personally I find the last one more consistent (Pr

On 2015/07/15 21:53:56, mgraczyk wrote: > On 2015/07/15 21:29:17, aluebs-webrtc wrote: > > On 2015/07/15 20:03:20, mgraczyk wrote: > > > On 2015/07/15 18:04:06, aluebs-webrtc wrote: > > > > On 2015/07/15 01:12:47, mgraczyk wrote: > > > > > On 2015/07/14 23:12:43, aluebs-webrtc wrote: > > > > > > Here you are passing in a ReverseStream as well. You could probably > just > > > > drop > > > > > > the ProcessingConfig and pass in 2 StreamConfigs instead. > > > > > > > > > > That's true, but this makes the interface more consistent and easier to > > > > change. > > > > > > > > > > I'll do whatever you want here. > > > > > > > > I personally prefer to have 2 StreamConfigs to be clear to the user which > > > > parameters are actually going to be used and to be consistent with the > > reverse > > > > stream. > > > > > > They are all used. If the reverse stream configuration changes here, then > APM > > > will be reinitialized with a different reverse stream. > > > > > > The point is that if any part of the config changes, the whole thing gets > > wiped > > > and reinitialized. That's how it was before, except it was less obvious > > > because the caller didn't pass in information about the reverse stream. It > > seems > > > like passing the reverse config here makes that more clear, not less. > > > > But before you could only change the reverse stream when calling the reverse > > stream and it would be reinitialized then. I find that clearer, than having to > > specify (or use defaults) for a stream I am not calling at the moment. > > Calls to ProcessStream didn't change the reverse stream config, but they did > reset its buffers and clear the current state when the forward stream config > changed. Previously, the old reverse config was reused implicitly. Isn't it > better to require clients to explicitly specify a new config when the reverse > stream is reset rather than just using the old one? > > Clients will not have to specify new config or use defaults for the reverse > stream. They will use the same ProcessingConfig object they used to create APM > on calls to ProcessStream and AnalyzeReverseStream. I think calling code will > look like: > > > apm->Initialize(my_config_); > ... > apm->ProcessStream(my_input_, my_config_, my_output); > ... > apm->AnalyzeReverseStream(my_reverse_data_, my_config_->reverse_stream()); > > > Compare that to > apm->Initialize(my_config_); > ... > > apm->ProcessStream(my_input_, my_config_->input_stream(), > my_config_->output_stream(), my_output); > ... > apm->AnalyzeReverseStream(my_reverse_data_, my_config_->reverse_stream()); > > > I'll change it if you really want me to.

Personally I find the last one more consistent (ProcessStream and AnalyzeReverseStream are the same) and clear. If you know you are going to reset it, then you can always call Initialize again with the whole config, else you only pass in what is needed. For example, an application that has no reverse stream never has to create a config (not even the default one) for it.

mgraczyk 2015/07/16 00:50:17 I'll change it, but just so we're clear: ProcessSt

On 2015/07/16 00:20:52, aluebs-webrtc wrote: > On 2015/07/15 21:53:56, mgraczyk wrote: > > On 2015/07/15 21:29:17, aluebs-webrtc wrote: > > > On 2015/07/15 20:03:20, mgraczyk wrote: > > > > On 2015/07/15 18:04:06, aluebs-webrtc wrote: > > > > > On 2015/07/15 01:12:47, mgraczyk wrote: > > > > > > On 2015/07/14 23:12:43, aluebs-webrtc wrote: > > > > > > > Here you are passing in a ReverseStream as well. You could probably > > just > > > > > drop > > > > > > > the ProcessingConfig and pass in 2 StreamConfigs instead. > > > > > > > > > > > > That's true, but this makes the interface more consistent and easier > to > > > > > change. > > > > > > > > > > > > I'll do whatever you want here. > > > > > > > > > > I personally prefer to have 2 StreamConfigs to be clear to the user > which > > > > > parameters are actually going to be used and to be consistent with the > > > reverse > > > > > stream. > > > > > > > > They are all used. If the reverse stream configuration changes here, then > > APM > > > > will be reinitialized with a different reverse stream. > > > > > > > > The point is that if any part of the config changes, the whole thing gets > > > wiped > > > > and reinitialized. That's how it was before, except it was less obvious > > > > because the caller didn't pass in information about the reverse stream. It > > > seems > > > > like passing the reverse config here makes that more clear, not less. > > > > > > But before you could only change the reverse stream when calling the reverse > > > stream and it would be reinitialized then. I find that clearer, than having > to > > > specify (or use defaults) for a stream I am not calling at the moment. > > > > Calls to ProcessStream didn't change the reverse stream config, but they did > > reset its buffers and clear the current state when the forward stream config > > changed. Previously, the old reverse config was reused implicitly. Isn't it > > better to require clients to explicitly specify a new config when the reverse > > stream is reset rather than just using the old one? > > > > Clients will not have to specify new config or use defaults for the reverse > > stream. They will use the same ProcessingConfig object they used to create APM > > on calls to ProcessStream and AnalyzeReverseStream. I think calling code will > > look like: > > > > > > apm->Initialize(my_config_); > > ... > > apm->ProcessStream(my_input_, my_config_, my_output); > > ... > > apm->AnalyzeReverseStream(my_reverse_data_, my_config_->reverse_stream()); > > > > > > Compare that to > > apm->Initialize(my_config_); > > ... > > > > apm->ProcessStream(my_input_, my_config_->input_stream(), > > my_config_->output_stream(), my_output); > > ... > > apm->AnalyzeReverseStream(my_reverse_data_, my_config_->reverse_stream()); > > > > > > I'll change it if you really want me to. > > Personally I find the last one more consistent (ProcessStream and > AnalyzeReverseStream are the same) and clear. If you know you are going to reset > it, then you can always call Initialize again with the whole config, else you > only pass in what is needed. For example, an application that has no reverse > stream never has to create a config (not even the default one) for it.

I'll change it, but just so we're clear: ProcessStream does reset the reverse stream, so the client would not need call initialize again with the whole config. Not only that, but if we're going to only pass "what is needed" we shouldn't be passing any config here. The client should call Initialize() to reinitialize with different config.

+ float* const* dest) = 0;

// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame

// will not be modified. On the client-side, this is the far-end (or to be

// rendered) audio.

@@ -326,6 +345,11 @@ class AudioProcessing {

int sample_rate_hz,

ChannelLayout layout) = 0;

+ // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

+ // |data| points to a channel buffer, arranged according to |reverse_config|.

+ virtual int AnalyzeReverseStream(const float* const* data,

aluebs-webrtc 2015/07/14 23:12:44 Same comment about adding yet another interface.

mgraczyk 2015/07/15 01:12:47 Acknowledged.

+ const StreamConfig& reverse_config) = 0;

// This must be called if and only if echo processing is enabled.

// Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end

@@ -432,6 +456,91 @@ class AudioProcessing {

static const int kChunkSizeMs = 10;

};

+class StreamConfig {

+ public:

+ StreamConfig(int sample_rate_hz = 0,

+ int num_channels = 0,

+ bool has_keyboard = false)

+ : sample_rate_hz_(sample_rate_hz),

+ num_channels_(num_channels),

+ has_keyboard_(has_keyboard),

+ samples_per_channel_(calculate_samples_per_channel(sample_rate_hz)) {}

+ void set_sample_rate_hz(int value) {

+ sample_rate_hz_ = value;

+ samples_per_channel_ = calculate_samples_per_channel(value);

+ }

+ void set_num_channels(int value) { num_channels_ = value; }

+ void set_has_keyboard(bool value) { has_keyboard_ = value; }

+ int sample_rate_hz() const { return sample_rate_hz_; }

+ int num_channels() const { return num_channels_; }

+ bool has_keyboard() const { return has_keyboard_; }

+ int samples_per_channel() const { return samples_per_channel_; }

+ bool operator==(const StreamConfig& other) const {

+ return sample_rate_hz_ == other.sample_rate_hz_ &&

+ num_channels_ == other.num_channels_ &&

+ has_keyboard_ == other.has_keyboard_;

+ }

+ bool operator!=(const StreamConfig& other) const { return !(*this == other); }

+ private:

+ static int calculate_samples_per_channel(int sample_rate_hz) {

+ return sample_rate_hz * (AudioProcessing::kChunkSizeMs / 1000.0);

aluebs-webrtc 2015/07/14 23:12:43 Why do you need 1000.0 if you cast it to int anywa

mgraczyk 2015/07/15 01:12:47 I guess if all our sampling rates are divisible by

aluebs-webrtc 2015/07/15 18:04:06 If our sample rates are not divisible by 100 we ha

mgraczyk 2015/07/15 20:03:19 Yeah, that makes sense.

+ }

+ int sample_rate_hz_;

+ int num_channels_;

+ bool has_keyboard_;

+ int samples_per_channel_;

+};

+struct ProcessingConfig {

aluebs-webrtc 2015/07/14 23:12:44 Why not a class?

mgraczyk 2015/07/15 01:12:47 Done.

+ enum StreamName {

+ kInputStream,

+ kOutputStream,

+ kReverseStream,

+ kNumStreamNames,

+ };

+ const StreamConfig& input_stream() const {

+ return streams[StreamName::kInputStream];

+ }

+ const StreamConfig& output_stream() const {

+ return streams[StreamName::kOutputStream];

+ }

+ const StreamConfig& reverse_stream() const {

+ return streams[StreamName::kReverseStream];

+ }

+ StreamConfig& input_stream() {

+ return streams[StreamName::kInputStream];

+ }

+ StreamConfig& output_stream() {

+ return streams[StreamName::kOutputStream];

+ }

+ StreamConfig& reverse_stream() {

+ return streams[StreamName::kReverseStream];

+ }

+ bool operator==(const ProcessingConfig& other) const {

+ for (int i = 0; i < StreamName::kNumStreamNames; ++i) {

+ if (this->streams[i] != other.streams[i]) {

+ return false;

+ }

+ return true;

+ }

+ bool operator!=(const ProcessingConfig& other) const {

+ return !(*this == other);

+ }

+ StreamConfig streams[StreamName::kNumStreamNames];

+};

// The acoustic echo cancellation (AEC) component provides better performance

// than AECM but also requires more processing power and is dependent on delay

// stability and reporting accuracy. As such it is well-suited and recommended