webrtc/modules/audio_processing/include/audio_processing.h - Issue 1226093007: Allow more than 2 input channels in AudioProcessing.

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1226093007: Allow more than 2 input channels in AudioProcessing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Address Comments Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 11 matching lines...) Expand all Loading...
22	22

23 struct AecCore;	23 struct AecCore;

24	24

25 namespace webrtc {	25 namespace webrtc {

26	26

27 class AudioFrame;	27 class AudioFrame;

28	28

29 template<typename T>	29 template<typename T>

30 class Beamformer;	30 class Beamformer;

31	31

	32 class StreamConfig;

	33 class ProcessingConfig;

	34

32 class EchoCancellation;	35 class EchoCancellation;

33 class EchoControlMobile;	36 class EchoControlMobile;

34 class GainControl;	37 class GainControl;

35 class HighPassFilter;	38 class HighPassFilter;

36 class LevelEstimator;	39 class LevelEstimator;

37 class NoiseSuppression;	40 class NoiseSuppression;

38 class VoiceDetection;	41 class VoiceDetection;

39	42

40 // Use to enable the extended filter mode in the AEC, along with robustness	43 // Use to enable the extended filter mode in the AEC, along with robustness

41 // measures around the reported system delays. It comes with a significant	44 // measures around the reported system delays. It comes with a significant

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 // [12, 255]. Here, 255 maps to 100%.	80 // [12, 255]. Here, 255 maps to 100%.

78 //	81 //

79 // Must be provided through AudioProcessing::Create(Confg&).	82 // Must be provided through AudioProcessing::Create(Confg&).

80 #if defined(WEBRTC_CHROMIUM_BUILD)	83 #if defined(WEBRTC_CHROMIUM_BUILD)

81 static const int kAgcStartupMinVolume = 85;	84 static const int kAgcStartupMinVolume = 85;

82 #else	85 #else

83 static const int kAgcStartupMinVolume = 0;	86 static const int kAgcStartupMinVolume = 0;

84 #endif // defined(WEBRTC_CHROMIUM_BUILD)	87 #endif // defined(WEBRTC_CHROMIUM_BUILD)

85 struct ExperimentalAgc {	88 struct ExperimentalAgc {

86 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}	89 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}

87 ExperimentalAgc(bool enabled)	90 explicit ExperimentalAgc(bool enabled)

88 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}	91 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}

89 ExperimentalAgc(bool enabled, int startup_min_volume)	92 ExperimentalAgc(bool enabled, int startup_min_volume)

90 : enabled(enabled), startup_min_volume(startup_min_volume) {}	93 : enabled(enabled), startup_min_volume(startup_min_volume) {}

91 bool enabled;	94 bool enabled;

92 int startup_min_volume;	95 int startup_min_volume;

93 };	96 };

94	97

95 // Use to enable experimental noise suppression. It can be set in the	98 // Use to enable experimental noise suppression. It can be set in the

96 // constructor or using AudioProcessing::SetExtraOptions().	99 // constructor or using AudioProcessing::SetExtraOptions().

97 struct ExperimentalNs {	100 struct ExperimentalNs {

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
229 //	232 //

230 // It is also not necessary to call if the audio parameters (sample	233 // It is also not necessary to call if the audio parameters (sample

231 // rate and number of channels) have changed. Passing updated parameters	234 // rate and number of channels) have changed. Passing updated parameters

232 // directly to \|ProcessStream()\| and \|AnalyzeReverseStream()\| is permissible.	235 // directly to \|ProcessStream()\| and \|AnalyzeReverseStream()\| is permissible.

233 // If the parameters are known at init-time though, they may be provided.	236 // If the parameters are known at init-time though, they may be provided.

234 virtual int Initialize() = 0;	237 virtual int Initialize() = 0;

235	238

236 // The int16 interfaces require:	239 // The int16 interfaces require:

237 // - only \|NativeRate\|s be used	240 // - only \|NativeRate\|s be used

238 // - that the input, output and reverse rates must match	241 // - that the input, output and reverse rates must match

239 // - that \|output_layout\| matches \|input_layout\|	242 // - that \|processing_config.output_stream()\| matches

	243 // \|processing_config.input_stream()\|.

240 //	244 //

241 // The float interfaces accept arbitrary rates and support differing input	245 // The float interfaces accept arbitrary rates and support differing input and

242 // and output layouts, but the output may only remove channels, not add.	246 // output layouts, but the output must have either one channel or the same

	247 // number of channels as the input.

	248 virtual int Initialize(const ProcessingConfig& processing_config) = 0;

	249

	250 // Initialize with unpacked parameters. See Initialize() above for details.

243 virtual int Initialize(int input_sample_rate_hz,	251 virtual int Initialize(int input_sample_rate_hz,

244 int output_sample_rate_hz,	252 int output_sample_rate_hz,

245 int reverse_sample_rate_hz,	253 int reverse_sample_rate_hz,

246 ChannelLayout input_layout,	254 ChannelLayout input_layout,

247 ChannelLayout output_layout,	255 ChannelLayout output_layout,

248 ChannelLayout reverse_layout) = 0;	256 ChannelLayout reverse_layout) = 0;

249	257

250 // Pass down additional options which don't have explicit setters. This	258 // Pass down additional options which don't have explicit setters. This

251 // ensures the options are applied immediately.	259 // ensures the options are applied immediately.

252 virtual void SetExtraOptions(const Config& config) = 0;	260 virtual void SetExtraOptions(const Config& config) = 0;

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
285 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|	293 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|

286 // members of \|frame\| must be valid. If changed from the previous call to this	294 // members of \|frame\| must be valid. If changed from the previous call to this

287 // method, it will trigger an initialization.	295 // method, it will trigger an initialization.

288 virtual int ProcessStream(AudioFrame* frame) = 0;	296 virtual int ProcessStream(AudioFrame* frame) = 0;

289	297

290 // Accepts deinterleaved float audio with the range [-1, 1]. Each element	298 // Accepts deinterleaved float audio with the range [-1, 1]. Each element

291 // of \|src\| points to a channel buffer, arranged according to	299 // of \|src\| points to a channel buffer, arranged according to

292 // \|input_layout\|. At output, the channels will be arranged according to	300 // \|input_layout\|. At output, the channels will be arranged according to

293 // \|output_layout\| at \|output_sample_rate_hz\| in \|dest\|.	301 // \|output_layout\| at \|output_sample_rate_hz\| in \|dest\|.

294 //	302 //

295 // The output layout may only remove channels, not add. \|src\| and \|dest\|	303 // The output layout must have one channel or as many channels as the input.

296 // may use the same memory, if desired.	304 // \|src\| and \|dest\| may use the same memory, if desired.

297 virtual int ProcessStream(const float* const* src,	305 virtual int ProcessStream(const float* const* src,
	ajm 2015/07/15 05:21:20 Can you add a TODO here and on the deprecated Anal Can you add a TODO here and on the deprecated AnalyzeReverseStream to remove when clients are updated? mgraczyk 2015/07/15 20:03:20 Done. Show quoted text On 2015/07/15 05:21:20, ajm wrote: > Can you add a TODO here and on the deprecated AnalyzeReverseStream to remove > when clients are updated? Done.
298 int samples_per_channel,	306 int samples_per_channel,

299 int input_sample_rate_hz,	307 int input_sample_rate_hz,

300 ChannelLayout input_layout,	308 ChannelLayout input_layout,

301 int output_sample_rate_hz,	309 int output_sample_rate_hz,

302 ChannelLayout output_layout,	310 ChannelLayout output_layout,

303 float* const* dest) = 0;	311 float* const* dest) = 0;

304	312

	313 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

	314 // \|src\| points to a channel buffer, arranged according to

	315 // \|processing_config.input_stream()\|. At output, the channels will be

	316 // arranged according to \|processing_config.output_stream()\| in \|dest\|.

	317 //

	318 // The output must have one channel or as many channels as the input. \|src\|

	319 // and \|dest\| may use the same memory, if desired.

	320 virtual int ProcessStream(const float* const* src,

	321 const ProcessingConfig& processing_config,

	322 float* const* dest) = 0;

	323

305 // Analyzes a 10 ms \|frame\| of the reverse direction audio stream. The frame	324 // Analyzes a 10 ms \|frame\| of the reverse direction audio stream. The frame

306 // will not be modified. On the client-side, this is the far-end (or to be	325 // will not be modified. On the client-side, this is the far-end (or to be

307 // rendered) audio.	326 // rendered) audio.

308 //	327 //

309 // It is only necessary to provide this if echo processing is enabled, as the	328 // It is only necessary to provide this if echo processing is enabled, as the

310 // reverse stream forms the echo reference signal. It is recommended, but not	329 // reverse stream forms the echo reference signal. It is recommended, but not

311 // necessary, to provide if gain control is enabled. On the server-side this	330 // necessary, to provide if gain control is enabled. On the server-side this

312 // typically will not be used. If you're not sure what to pass in here,	331 // typically will not be used. If you're not sure what to pass in here,

313 // chances are you don't need to use it.	332 // chances are you don't need to use it.

314 //	333 //

315 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|	334 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|

316 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to	335 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to

317 // \|input_sample_rate_hz()\|	336 // \|input_sample_rate_hz()\|

318 //	337 //

319 // TODO(ajm): add const to input; requires an implementation fix.	338 // TODO(ajm): add const to input; requires an implementation fix.

320 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;	339 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;

321	340

322 // Accepts deinterleaved float audio with the range [-1, 1]. Each element	341 // Accepts deinterleaved float audio with the range [-1, 1]. Each element

323 // of \|data\| points to a channel buffer, arranged according to \|layout\|.	342 // of \|data\| points to a channel buffer, arranged according to \|layout\|.

324 virtual int AnalyzeReverseStream(const float* const* data,	343 virtual int AnalyzeReverseStream(const float* const* data,

325 int samples_per_channel,	344 int samples_per_channel,

326 int sample_rate_hz,	345 int sample_rate_hz,

327 ChannelLayout layout) = 0;	346 ChannelLayout layout) = 0;

328	347

	348 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

	349 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.

	350 virtual int AnalyzeReverseStream(const float* const* data,

	351 const StreamConfig& reverse_config) = 0;

	352

329 // This must be called if and only if echo processing is enabled.	353 // This must be called if and only if echo processing is enabled.

330 //	354 //

331 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end	355 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end

332 // frame and ProcessStream() receiving a near-end frame containing the	356 // frame and ProcessStream() receiving a near-end frame containing the

333 // corresponding echo. On the client-side this can be expressed as	357 // corresponding echo. On the client-side this can be expressed as

334 // delay = (t_render - t_analyze) + (t_process - t_capture)	358 // delay = (t_render - t_analyze) + (t_process - t_capture)

335 // where,	359 // where,

336 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and	360 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and

337 // t_render is the time the first sample of the same frame is rendered by	361 // t_render is the time the first sample of the same frame is rendered by

338 // the audio hardware.	362 // the audio hardware.

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
425 enum NativeRate {	449 enum NativeRate {

426 kSampleRate8kHz = 8000,	450 kSampleRate8kHz = 8000,

427 kSampleRate16kHz = 16000,	451 kSampleRate16kHz = 16000,

428 kSampleRate32kHz = 32000,	452 kSampleRate32kHz = 32000,

429 kSampleRate48kHz = 48000	453 kSampleRate48kHz = 48000

430 };	454 };

431	455

432 static const int kChunkSizeMs = 10;	456 static const int kChunkSizeMs = 10;

433 };	457 };

434	458

	459 class StreamConfig {

	460 public:

	461 StreamConfig(int sample_rate_hz = 0,

	462 int num_channels = 0,

	463 bool has_keyboard = false)

	464 : sample_rate_hz_(sample_rate_hz),

	465 num_channels_(num_channels),

	466 has_keyboard_(has_keyboard),

	467 samples_per_channel_(calculate_samples_per_channel(sample_rate_hz)) {}
	ajm 2015/07/15 05:21:20 Alex, Michael: I know it breaks convention in this Alex, Michael: I know it breaks convention in this module, but what do you think about using num_frames_ to represent this quantity? We've been switching elsewhere. aluebs-webrtc 2015/07/15 16:42:17 It is unfortunate that we used "frames" to refer t Show quoted text On 2015/07/15 05:21:20, ajm wrote: > Alex, Michael: I know it breaks convention in this module, but what do you think > about using num_frames_ to represent this quantity? We've been switching > elsewhere. It is unfortunate that we used "frames" to refer to blocks/chunks in other places, so I find the term "frames" really confusing. But now that we have a convention, I think we should stick with it for all new code. In short, I agree it is probably for the best to use num_frames_. mgraczyk 2015/07/15 20:03:20 Done, although I think there are plenty of better Show quoted text On 2015/07/15 16:42:17, aluebs-webrtc wrote: > On 2015/07/15 05:21:20, ajm wrote: > > Alex, Michael: I know it breaks convention in this module, but what do you > think > > about using num_frames_ to represent this quantity? We've been switching > > elsewhere. > > It is unfortunate that we used "frames" to refer to blocks/chunks in other > places, so I find the term "frames" really confusing. But now that we have a > convention, I think we should stick with it for all new code. In short, I agree > it is probably for the best to use num_frames_. Done, although I think there are plenty of better names. "frames_per_chunk_", "frames_per_channel_", "chunk_size_frames_", or what we have now would all be more clear.
	468

	469 void set_sample_rate_hz(int value) {

	470 sample_rate_hz_ = value;

	471 samples_per_channel_ = calculate_samples_per_channel(value);

	472 }

	473 void set_num_channels(int value) { num_channels_ = value; }

	474 void set_has_keyboard(bool value) { has_keyboard_ = value; }

	475

	476 int sample_rate_hz() const { return sample_rate_hz_; }

	477 int num_channels() const { return num_channels_; }

	478 bool has_keyboard() const { return has_keyboard_; }

	479 int samples_per_channel() const { return samples_per_channel_; }

	480

	481 bool operator==(const StreamConfig& other) const {

	482 return sample_rate_hz_ == other.sample_rate_hz_ &&

	483 num_channels_ == other.num_channels_ &&

	484 has_keyboard_ == other.has_keyboard_;

	485 }

	486

	487 bool operator!=(const StreamConfig& other) const { return !(*this == other); }

	488

	489 private:

	490 static int calculate_samples_per_channel(int sample_rate_hz) {

	491 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;

	492 }

	493

	494 int sample_rate_hz_;

	495 int num_channels_;

	496 bool has_keyboard_;

	497 int samples_per_channel_;

	498 };

	499

	500 class ProcessingConfig {

	501 public:

	502 enum StreamName {

	503 kInputStream,

	504 kOutputStream,

	505 kReverseStream,

	506 kNumStreamNames,

	507 };

	508

	509 const StreamConfig& input_stream() const {

	510 return streams[StreamName::kInputStream];

	511 }

	512 const StreamConfig& output_stream() const {

	513 return streams[StreamName::kOutputStream];

	514 }

	515 const StreamConfig& reverse_stream() const {

	516 return streams[StreamName::kReverseStream];

	517 }

	518

	519 StreamConfig& input_stream() {

	520 return streams[StreamName::kInputStream];

	521 }

	522 StreamConfig& output_stream() {

	523 return streams[StreamName::kOutputStream];

	524 }

	525 StreamConfig& reverse_stream() {

	526 return streams[StreamName::kReverseStream];

	527 }

	528

	529 bool operator==(const ProcessingConfig& other) const {

	530 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {

	531 if (this->streams[i] != other.streams[i]) {

	532 return false;

	533 }

	534 }

	535 return true;

	536 }

	537

	538 bool operator!=(const ProcessingConfig& other) const {

	539 return !(*this == other);

	540 }

	541

	542 StreamConfig streams[StreamName::kNumStreamNames];

	543 };

	544

435 // The acoustic echo cancellation (AEC) component provides better performance	545 // The acoustic echo cancellation (AEC) component provides better performance

436 // than AECM but also requires more processing power and is dependent on delay	546 // than AECM but also requires more processing power and is dependent on delay

437 // stability and reporting accuracy. As such it is well-suited and recommended	547 // stability and reporting accuracy. As such it is well-suited and recommended

438 // for PC and IP phone applications.	548 // for PC and IP phone applications.

439 //	549 //

440 // Not recommended to be enabled on the server-side.	550 // Not recommended to be enabled on the server-side.

441 class EchoCancellation {	551 class EchoCancellation {

442 public:	552 public:

443 // EchoCancellation and EchoControlMobile may not be enabled simultaneously.	553 // EchoCancellation and EchoControlMobile may not be enabled simultaneously.

444 // Enabling one will disable the other.	554 // Enabling one will disable the other.

(...skipping 342 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
787 // This does not impact the size of frames passed to \|ProcessStream()\|.	897 // This does not impact the size of frames passed to \|ProcessStream()\|.

788 virtual int set_frame_size_ms(int size) = 0;	898 virtual int set_frame_size_ms(int size) = 0;

789 virtual int frame_size_ms() const = 0;	899 virtual int frame_size_ms() const = 0;

790	900

791 protected:	901 protected:

792 virtual ~VoiceDetection() {}	902 virtual ~VoiceDetection() {}

793 };	903 };

794 } // namespace webrtc	904 } // namespace webrtc

795	905

796 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_	906 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

OLD	NEW