Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1226093007: Allow more than 2 input channels in AudioProcessing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Address Comments Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 22
23 struct AecCore; 23 struct AecCore;
24 24
25 namespace webrtc { 25 namespace webrtc {
26 26
27 class AudioFrame; 27 class AudioFrame;
28 28
29 template<typename T> 29 template<typename T>
30 class Beamformer; 30 class Beamformer;
31 31
32 class StreamConfig;
33 class ProcessingConfig;
34
32 class EchoCancellation; 35 class EchoCancellation;
33 class EchoControlMobile; 36 class EchoControlMobile;
34 class GainControl; 37 class GainControl;
35 class HighPassFilter; 38 class HighPassFilter;
36 class LevelEstimator; 39 class LevelEstimator;
37 class NoiseSuppression; 40 class NoiseSuppression;
38 class VoiceDetection; 41 class VoiceDetection;
39 42
40 // Use to enable the extended filter mode in the AEC, along with robustness 43 // Use to enable the extended filter mode in the AEC, along with robustness
41 // measures around the reported system delays. It comes with a significant 44 // measures around the reported system delays. It comes with a significant
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 // [12, 255]. Here, 255 maps to 100%. 80 // [12, 255]. Here, 255 maps to 100%.
78 // 81 //
79 // Must be provided through AudioProcessing::Create(Confg&). 82 // Must be provided through AudioProcessing::Create(Confg&).
80 #if defined(WEBRTC_CHROMIUM_BUILD) 83 #if defined(WEBRTC_CHROMIUM_BUILD)
81 static const int kAgcStartupMinVolume = 85; 84 static const int kAgcStartupMinVolume = 85;
82 #else 85 #else
83 static const int kAgcStartupMinVolume = 0; 86 static const int kAgcStartupMinVolume = 0;
84 #endif // defined(WEBRTC_CHROMIUM_BUILD) 87 #endif // defined(WEBRTC_CHROMIUM_BUILD)
85 struct ExperimentalAgc { 88 struct ExperimentalAgc {
86 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} 89 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
87 ExperimentalAgc(bool enabled) 90 explicit ExperimentalAgc(bool enabled)
88 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} 91 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
89 ExperimentalAgc(bool enabled, int startup_min_volume) 92 ExperimentalAgc(bool enabled, int startup_min_volume)
90 : enabled(enabled), startup_min_volume(startup_min_volume) {} 93 : enabled(enabled), startup_min_volume(startup_min_volume) {}
91 bool enabled; 94 bool enabled;
92 int startup_min_volume; 95 int startup_min_volume;
93 }; 96 };
94 97
95 // Use to enable experimental noise suppression. It can be set in the 98 // Use to enable experimental noise suppression. It can be set in the
96 // constructor or using AudioProcessing::SetExtraOptions(). 99 // constructor or using AudioProcessing::SetExtraOptions().
97 struct ExperimentalNs { 100 struct ExperimentalNs {
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 // 232 //
230 // It is also not necessary to call if the audio parameters (sample 233 // It is also not necessary to call if the audio parameters (sample
231 // rate and number of channels) have changed. Passing updated parameters 234 // rate and number of channels) have changed. Passing updated parameters
232 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. 235 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
233 // If the parameters are known at init-time though, they may be provided. 236 // If the parameters are known at init-time though, they may be provided.
234 virtual int Initialize() = 0; 237 virtual int Initialize() = 0;
235 238
236 // The int16 interfaces require: 239 // The int16 interfaces require:
237 // - only |NativeRate|s be used 240 // - only |NativeRate|s be used
238 // - that the input, output and reverse rates must match 241 // - that the input, output and reverse rates must match
239 // - that |output_layout| matches |input_layout| 242 // - that |processing_config.output_stream()| matches
243 // |processing_config.input_stream()|.
240 // 244 //
241 // The float interfaces accept arbitrary rates and support differing input 245 // The float interfaces accept arbitrary rates and support differing input and
242 // and output layouts, but the output may only remove channels, not add. 246 // output layouts, but the output must have either one channel or the same
247 // number of channels as the input.
248 virtual int Initialize(const ProcessingConfig& processing_config) = 0;
249
250 // Initialize with unpacked parameters. See Initialize() above for details.
243 virtual int Initialize(int input_sample_rate_hz, 251 virtual int Initialize(int input_sample_rate_hz,
244 int output_sample_rate_hz, 252 int output_sample_rate_hz,
245 int reverse_sample_rate_hz, 253 int reverse_sample_rate_hz,
246 ChannelLayout input_layout, 254 ChannelLayout input_layout,
247 ChannelLayout output_layout, 255 ChannelLayout output_layout,
248 ChannelLayout reverse_layout) = 0; 256 ChannelLayout reverse_layout) = 0;
249 257
250 // Pass down additional options which don't have explicit setters. This 258 // Pass down additional options which don't have explicit setters. This
251 // ensures the options are applied immediately. 259 // ensures the options are applied immediately.
252 virtual void SetExtraOptions(const Config& config) = 0; 260 virtual void SetExtraOptions(const Config& config) = 0;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
285 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 293 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
286 // members of |frame| must be valid. If changed from the previous call to this 294 // members of |frame| must be valid. If changed from the previous call to this
287 // method, it will trigger an initialization. 295 // method, it will trigger an initialization.
288 virtual int ProcessStream(AudioFrame* frame) = 0; 296 virtual int ProcessStream(AudioFrame* frame) = 0;
289 297
290 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 298 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
291 // of |src| points to a channel buffer, arranged according to 299 // of |src| points to a channel buffer, arranged according to
292 // |input_layout|. At output, the channels will be arranged according to 300 // |input_layout|. At output, the channels will be arranged according to
293 // |output_layout| at |output_sample_rate_hz| in |dest|. 301 // |output_layout| at |output_sample_rate_hz| in |dest|.
294 // 302 //
295 // The output layout may only remove channels, not add. |src| and |dest| 303 // The output layout must have one channel or as many channels as the input.
296 // may use the same memory, if desired. 304 // |src| and |dest| may use the same memory, if desired.
297 virtual int ProcessStream(const float* const* src, 305 virtual int ProcessStream(const float* const* src,
ajm 2015/07/15 05:21:20 Can you add a TODO here and on the deprecated Anal
mgraczyk 2015/07/15 20:03:20 Done.
298 int samples_per_channel, 306 int samples_per_channel,
299 int input_sample_rate_hz, 307 int input_sample_rate_hz,
300 ChannelLayout input_layout, 308 ChannelLayout input_layout,
301 int output_sample_rate_hz, 309 int output_sample_rate_hz,
302 ChannelLayout output_layout, 310 ChannelLayout output_layout,
303 float* const* dest) = 0; 311 float* const* dest) = 0;
304 312
313 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
314 // |src| points to a channel buffer, arranged according to
315 // |processing_config.input_stream()|. At output, the channels will be
316 // arranged according to |processing_config.output_stream()| in |dest|.
317 //
318 // The output must have one channel or as many channels as the input. |src|
319 // and |dest| may use the same memory, if desired.
320 virtual int ProcessStream(const float* const* src,
321 const ProcessingConfig& processing_config,
322 float* const* dest) = 0;
323
305 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame 324 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
306 // will not be modified. On the client-side, this is the far-end (or to be 325 // will not be modified. On the client-side, this is the far-end (or to be
307 // rendered) audio. 326 // rendered) audio.
308 // 327 //
309 // It is only necessary to provide this if echo processing is enabled, as the 328 // It is only necessary to provide this if echo processing is enabled, as the
310 // reverse stream forms the echo reference signal. It is recommended, but not 329 // reverse stream forms the echo reference signal. It is recommended, but not
311 // necessary, to provide if gain control is enabled. On the server-side this 330 // necessary, to provide if gain control is enabled. On the server-side this
312 // typically will not be used. If you're not sure what to pass in here, 331 // typically will not be used. If you're not sure what to pass in here,
313 // chances are you don't need to use it. 332 // chances are you don't need to use it.
314 // 333 //
315 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 334 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
316 // members of |frame| must be valid. |sample_rate_hz_| must correspond to 335 // members of |frame| must be valid. |sample_rate_hz_| must correspond to
317 // |input_sample_rate_hz()| 336 // |input_sample_rate_hz()|
318 // 337 //
319 // TODO(ajm): add const to input; requires an implementation fix. 338 // TODO(ajm): add const to input; requires an implementation fix.
320 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; 339 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
321 340
322 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 341 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
323 // of |data| points to a channel buffer, arranged according to |layout|. 342 // of |data| points to a channel buffer, arranged according to |layout|.
324 virtual int AnalyzeReverseStream(const float* const* data, 343 virtual int AnalyzeReverseStream(const float* const* data,
325 int samples_per_channel, 344 int samples_per_channel,
326 int sample_rate_hz, 345 int sample_rate_hz,
327 ChannelLayout layout) = 0; 346 ChannelLayout layout) = 0;
328 347
348 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
349 // |data| points to a channel buffer, arranged according to |reverse_config|.
350 virtual int AnalyzeReverseStream(const float* const* data,
351 const StreamConfig& reverse_config) = 0;
352
329 // This must be called if and only if echo processing is enabled. 353 // This must be called if and only if echo processing is enabled.
330 // 354 //
331 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end 355 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
332 // frame and ProcessStream() receiving a near-end frame containing the 356 // frame and ProcessStream() receiving a near-end frame containing the
333 // corresponding echo. On the client-side this can be expressed as 357 // corresponding echo. On the client-side this can be expressed as
334 // delay = (t_render - t_analyze) + (t_process - t_capture) 358 // delay = (t_render - t_analyze) + (t_process - t_capture)
335 // where, 359 // where,
336 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and 360 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
337 // t_render is the time the first sample of the same frame is rendered by 361 // t_render is the time the first sample of the same frame is rendered by
338 // the audio hardware. 362 // the audio hardware.
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
425 enum NativeRate { 449 enum NativeRate {
426 kSampleRate8kHz = 8000, 450 kSampleRate8kHz = 8000,
427 kSampleRate16kHz = 16000, 451 kSampleRate16kHz = 16000,
428 kSampleRate32kHz = 32000, 452 kSampleRate32kHz = 32000,
429 kSampleRate48kHz = 48000 453 kSampleRate48kHz = 48000
430 }; 454 };
431 455
432 static const int kChunkSizeMs = 10; 456 static const int kChunkSizeMs = 10;
433 }; 457 };
434 458
459 class StreamConfig {
460 public:
461 StreamConfig(int sample_rate_hz = 0,
462 int num_channels = 0,
463 bool has_keyboard = false)
464 : sample_rate_hz_(sample_rate_hz),
465 num_channels_(num_channels),
466 has_keyboard_(has_keyboard),
467 samples_per_channel_(calculate_samples_per_channel(sample_rate_hz)) {}
ajm 2015/07/15 05:21:20 Alex, Michael: I know it breaks convention in this
aluebs-webrtc 2015/07/15 16:42:17 It is unfortunate that we used "frames" to refer t
mgraczyk 2015/07/15 20:03:20 Done, although I think there are plenty of better
468
469 void set_sample_rate_hz(int value) {
470 sample_rate_hz_ = value;
471 samples_per_channel_ = calculate_samples_per_channel(value);
472 }
473 void set_num_channels(int value) { num_channels_ = value; }
474 void set_has_keyboard(bool value) { has_keyboard_ = value; }
475
476 int sample_rate_hz() const { return sample_rate_hz_; }
477 int num_channels() const { return num_channels_; }
478 bool has_keyboard() const { return has_keyboard_; }
479 int samples_per_channel() const { return samples_per_channel_; }
480
481 bool operator==(const StreamConfig& other) const {
482 return sample_rate_hz_ == other.sample_rate_hz_ &&
483 num_channels_ == other.num_channels_ &&
484 has_keyboard_ == other.has_keyboard_;
485 }
486
487 bool operator!=(const StreamConfig& other) const { return !(*this == other); }
488
489 private:
490 static int calculate_samples_per_channel(int sample_rate_hz) {
491 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
492 }
493
494 int sample_rate_hz_;
495 int num_channels_;
496 bool has_keyboard_;
497 int samples_per_channel_;
498 };
499
500 class ProcessingConfig {
501 public:
502 enum StreamName {
503 kInputStream,
504 kOutputStream,
505 kReverseStream,
506 kNumStreamNames,
507 };
508
509 const StreamConfig& input_stream() const {
510 return streams[StreamName::kInputStream];
511 }
512 const StreamConfig& output_stream() const {
513 return streams[StreamName::kOutputStream];
514 }
515 const StreamConfig& reverse_stream() const {
516 return streams[StreamName::kReverseStream];
517 }
518
519 StreamConfig& input_stream() {
520 return streams[StreamName::kInputStream];
521 }
522 StreamConfig& output_stream() {
523 return streams[StreamName::kOutputStream];
524 }
525 StreamConfig& reverse_stream() {
526 return streams[StreamName::kReverseStream];
527 }
528
529 bool operator==(const ProcessingConfig& other) const {
530 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
531 if (this->streams[i] != other.streams[i]) {
532 return false;
533 }
534 }
535 return true;
536 }
537
538 bool operator!=(const ProcessingConfig& other) const {
539 return !(*this == other);
540 }
541
542 StreamConfig streams[StreamName::kNumStreamNames];
543 };
544
435 // The acoustic echo cancellation (AEC) component provides better performance 545 // The acoustic echo cancellation (AEC) component provides better performance
436 // than AECM but also requires more processing power and is dependent on delay 546 // than AECM but also requires more processing power and is dependent on delay
437 // stability and reporting accuracy. As such it is well-suited and recommended 547 // stability and reporting accuracy. As such it is well-suited and recommended
438 // for PC and IP phone applications. 548 // for PC and IP phone applications.
439 // 549 //
440 // Not recommended to be enabled on the server-side. 550 // Not recommended to be enabled on the server-side.
441 class EchoCancellation { 551 class EchoCancellation {
442 public: 552 public:
443 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. 553 // EchoCancellation and EchoControlMobile may not be enabled simultaneously.
444 // Enabling one will disable the other. 554 // Enabling one will disable the other.
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after
787 // This does not impact the size of frames passed to |ProcessStream()|. 897 // This does not impact the size of frames passed to |ProcessStream()|.
788 virtual int set_frame_size_ms(int size) = 0; 898 virtual int set_frame_size_ms(int size) = 0;
789 virtual int frame_size_ms() const = 0; 899 virtual int frame_size_ms() const = 0;
790 900
791 protected: 901 protected:
792 virtual ~VoiceDetection() {} 902 virtual ~VoiceDetection() {}
793 }; 903 };
794 } // namespace webrtc 904 } // namespace webrtc
795 905
796 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 906 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698