Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(192)

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1253573005: Revert of Allow more than 2 input channels in AudioProcessing. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 22
23 struct AecCore; 23 struct AecCore;
24 24
25 namespace webrtc { 25 namespace webrtc {
26 26
27 class AudioFrame; 27 class AudioFrame;
28 28
29 template<typename T> 29 template<typename T>
30 class Beamformer; 30 class Beamformer;
31 31
32 class StreamConfig;
33 class ProcessingConfig;
34
35 class EchoCancellation; 32 class EchoCancellation;
36 class EchoControlMobile; 33 class EchoControlMobile;
37 class GainControl; 34 class GainControl;
38 class HighPassFilter; 35 class HighPassFilter;
39 class LevelEstimator; 36 class LevelEstimator;
40 class NoiseSuppression; 37 class NoiseSuppression;
41 class VoiceDetection; 38 class VoiceDetection;
42 39
43 // Use to enable the extended filter mode in the AEC, along with robustness 40 // Use to enable the extended filter mode in the AEC, along with robustness
44 // measures around the reported system delays. It comes with a significant 41 // measures around the reported system delays. It comes with a significant
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
80 // [12, 255]. Here, 255 maps to 100%. 77 // [12, 255]. Here, 255 maps to 100%.
81 // 78 //
82 // Must be provided through AudioProcessing::Create(Confg&). 79 // Must be provided through AudioProcessing::Create(Confg&).
83 #if defined(WEBRTC_CHROMIUM_BUILD) 80 #if defined(WEBRTC_CHROMIUM_BUILD)
84 static const int kAgcStartupMinVolume = 85; 81 static const int kAgcStartupMinVolume = 85;
85 #else 82 #else
86 static const int kAgcStartupMinVolume = 0; 83 static const int kAgcStartupMinVolume = 0;
87 #endif // defined(WEBRTC_CHROMIUM_BUILD) 84 #endif // defined(WEBRTC_CHROMIUM_BUILD)
88 struct ExperimentalAgc { 85 struct ExperimentalAgc {
89 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} 86 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
90 explicit ExperimentalAgc(bool enabled) 87 ExperimentalAgc(bool enabled)
91 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} 88 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
92 ExperimentalAgc(bool enabled, int startup_min_volume) 89 ExperimentalAgc(bool enabled, int startup_min_volume)
93 : enabled(enabled), startup_min_volume(startup_min_volume) {} 90 : enabled(enabled), startup_min_volume(startup_min_volume) {}
94 bool enabled; 91 bool enabled;
95 int startup_min_volume; 92 int startup_min_volume;
96 }; 93 };
97 94
98 // Use to enable experimental noise suppression. It can be set in the 95 // Use to enable experimental noise suppression. It can be set in the
99 // constructor or using AudioProcessing::SetExtraOptions(). 96 // constructor or using AudioProcessing::SetExtraOptions().
100 struct ExperimentalNs { 97 struct ExperimentalNs {
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 // 192 //
196 // // Repeate render and capture processing for the duration of the call... 193 // // Repeate render and capture processing for the duration of the call...
197 // // Start a new call... 194 // // Start a new call...
198 // apm->Initialize(); 195 // apm->Initialize();
199 // 196 //
200 // // Close the application... 197 // // Close the application...
201 // delete apm; 198 // delete apm;
202 // 199 //
203 class AudioProcessing { 200 class AudioProcessing {
204 public: 201 public:
205 // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
206 enum ChannelLayout { 202 enum ChannelLayout {
207 kMono, 203 kMono,
208 // Left, right. 204 // Left, right.
209 kStereo, 205 kStereo,
210 // Mono, keyboard mic. 206 // Mono, keyboard mic.
211 kMonoAndKeyboard, 207 kMonoAndKeyboard,
212 // Left, right, keyboard mic. 208 // Left, right, keyboard mic.
213 kStereoAndKeyboard 209 kStereoAndKeyboard
214 }; 210 };
215 211
(...skipping 17 matching lines...) Expand all
233 // 229 //
234 // It is also not necessary to call if the audio parameters (sample 230 // It is also not necessary to call if the audio parameters (sample
235 // rate and number of channels) have changed. Passing updated parameters 231 // rate and number of channels) have changed. Passing updated parameters
236 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. 232 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
237 // If the parameters are known at init-time though, they may be provided. 233 // If the parameters are known at init-time though, they may be provided.
238 virtual int Initialize() = 0; 234 virtual int Initialize() = 0;
239 235
240 // The int16 interfaces require: 236 // The int16 interfaces require:
241 // - only |NativeRate|s be used 237 // - only |NativeRate|s be used
242 // - that the input, output and reverse rates must match 238 // - that the input, output and reverse rates must match
243 // - that |processing_config.output_stream()| matches 239 // - that |output_layout| matches |input_layout|
244 // |processing_config.input_stream()|.
245 // 240 //
246 // The float interfaces accept arbitrary rates and support differing input and 241 // The float interfaces accept arbitrary rates and support differing input
247 // output layouts, but the output must have either one channel or the same 242 // and output layouts, but the output may only remove channels, not add.
248 // number of channels as the input.
249 virtual int Initialize(const ProcessingConfig& processing_config) = 0;
250
251 // Initialize with unpacked parameters. See Initialize() above for details.
252 //
253 // TODO(mgraczyk): Remove once clients are updated to use the new interface.
254 virtual int Initialize(int input_sample_rate_hz, 243 virtual int Initialize(int input_sample_rate_hz,
255 int output_sample_rate_hz, 244 int output_sample_rate_hz,
256 int reverse_sample_rate_hz, 245 int reverse_sample_rate_hz,
257 ChannelLayout input_layout, 246 ChannelLayout input_layout,
258 ChannelLayout output_layout, 247 ChannelLayout output_layout,
259 ChannelLayout reverse_layout) = 0; 248 ChannelLayout reverse_layout) = 0;
260 249
261 // Pass down additional options which don't have explicit setters. This 250 // Pass down additional options which don't have explicit setters. This
262 // ensures the options are applied immediately. 251 // ensures the options are applied immediately.
263 virtual void SetExtraOptions(const Config& config) = 0; 252 virtual void SetExtraOptions(const Config& config) = 0;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 285 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
297 // members of |frame| must be valid. If changed from the previous call to this 286 // members of |frame| must be valid. If changed from the previous call to this
298 // method, it will trigger an initialization. 287 // method, it will trigger an initialization.
299 virtual int ProcessStream(AudioFrame* frame) = 0; 288 virtual int ProcessStream(AudioFrame* frame) = 0;
300 289
301 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 290 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
302 // of |src| points to a channel buffer, arranged according to 291 // of |src| points to a channel buffer, arranged according to
303 // |input_layout|. At output, the channels will be arranged according to 292 // |input_layout|. At output, the channels will be arranged according to
304 // |output_layout| at |output_sample_rate_hz| in |dest|. 293 // |output_layout| at |output_sample_rate_hz| in |dest|.
305 // 294 //
306 // The output layout must have one channel or as many channels as the input. 295 // The output layout may only remove channels, not add. |src| and |dest|
307 // |src| and |dest| may use the same memory, if desired. 296 // may use the same memory, if desired.
308 //
309 // TODO(mgraczyk): Remove once clients are updated to use the new interface.
310 virtual int ProcessStream(const float* const* src, 297 virtual int ProcessStream(const float* const* src,
311 int samples_per_channel, 298 int samples_per_channel,
312 int input_sample_rate_hz, 299 int input_sample_rate_hz,
313 ChannelLayout input_layout, 300 ChannelLayout input_layout,
314 int output_sample_rate_hz, 301 int output_sample_rate_hz,
315 ChannelLayout output_layout, 302 ChannelLayout output_layout,
316 float* const* dest) = 0; 303 float* const* dest) = 0;
317 304
318 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
319 // |src| points to a channel buffer, arranged according to |input_stream|. At
320 // output, the channels will be arranged according to |output_stream| in
321 // |dest|.
322 //
323 // The output must have one channel or as many channels as the input. |src|
324 // and |dest| may use the same memory, if desired.
325 virtual int ProcessStream(const float* const* src,
326 const StreamConfig& input_config,
327 const StreamConfig& output_config,
328 float* const* dest) = 0;
329
330 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame 305 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
331 // will not be modified. On the client-side, this is the far-end (or to be 306 // will not be modified. On the client-side, this is the far-end (or to be
332 // rendered) audio. 307 // rendered) audio.
333 // 308 //
334 // It is only necessary to provide this if echo processing is enabled, as the 309 // It is only necessary to provide this if echo processing is enabled, as the
335 // reverse stream forms the echo reference signal. It is recommended, but not 310 // reverse stream forms the echo reference signal. It is recommended, but not
336 // necessary, to provide if gain control is enabled. On the server-side this 311 // necessary, to provide if gain control is enabled. On the server-side this
337 // typically will not be used. If you're not sure what to pass in here, 312 // typically will not be used. If you're not sure what to pass in here,
338 // chances are you don't need to use it. 313 // chances are you don't need to use it.
339 // 314 //
340 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 315 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
341 // members of |frame| must be valid. |sample_rate_hz_| must correspond to 316 // members of |frame| must be valid. |sample_rate_hz_| must correspond to
342 // |input_sample_rate_hz()| 317 // |input_sample_rate_hz()|
343 // 318 //
344 // TODO(ajm): add const to input; requires an implementation fix. 319 // TODO(ajm): add const to input; requires an implementation fix.
345 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; 320 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
346 321
347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 322 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
348 // of |data| points to a channel buffer, arranged according to |layout|. 323 // of |data| points to a channel buffer, arranged according to |layout|.
349 //
350 // TODO(mgraczyk): Remove once clients are updated to use the new interface.
351 virtual int AnalyzeReverseStream(const float* const* data, 324 virtual int AnalyzeReverseStream(const float* const* data,
352 int samples_per_channel, 325 int samples_per_channel,
353 int sample_rate_hz, 326 int sample_rate_hz,
354 ChannelLayout layout) = 0; 327 ChannelLayout layout) = 0;
355 328
356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
357 // |data| points to a channel buffer, arranged according to |reverse_config|.
358 virtual int AnalyzeReverseStream(const float* const* data,
359 const StreamConfig& reverse_config) = 0;
360
361 // This must be called if and only if echo processing is enabled. 329 // This must be called if and only if echo processing is enabled.
362 // 330 //
363 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end 331 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
364 // frame and ProcessStream() receiving a near-end frame containing the 332 // frame and ProcessStream() receiving a near-end frame containing the
365 // corresponding echo. On the client-side this can be expressed as 333 // corresponding echo. On the client-side this can be expressed as
366 // delay = (t_render - t_analyze) + (t_process - t_capture) 334 // delay = (t_render - t_analyze) + (t_process - t_capture)
367 // where, 335 // where,
368 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and 336 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
369 // t_render is the time the first sample of the same frame is rendered by 337 // t_render is the time the first sample of the same frame is rendered by
370 // the audio hardware. 338 // the audio hardware.
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
457 enum NativeRate { 425 enum NativeRate {
458 kSampleRate8kHz = 8000, 426 kSampleRate8kHz = 8000,
459 kSampleRate16kHz = 16000, 427 kSampleRate16kHz = 16000,
460 kSampleRate32kHz = 32000, 428 kSampleRate32kHz = 32000,
461 kSampleRate48kHz = 48000 429 kSampleRate48kHz = 48000
462 }; 430 };
463 431
464 static const int kChunkSizeMs = 10; 432 static const int kChunkSizeMs = 10;
465 }; 433 };
466 434
467 class StreamConfig {
468 public:
469 // sample_rate_hz: The sampling rate of the stream.
470 //
471 // num_channels: The number of audio channels in the stream, excluding the
472 // keyboard channel if it is present. When passing a
473 // StreamConfig with an array of arrays T*[N],
474 //
475 // N == {num_channels + 1 if has_keyboard
476 // {num_channels if !has_keyboard
477 //
478 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard
479 // is true, the last channel in any corresponding list of
480 // channels is the keyboard channel.
481 StreamConfig(int sample_rate_hz = 0,
482 int num_channels = 0,
483 bool has_keyboard = false)
484 : sample_rate_hz_(sample_rate_hz),
485 num_channels_(num_channels),
486 has_keyboard_(has_keyboard),
487 num_frames_(calculate_frames(sample_rate_hz)) {}
488
489 void set_sample_rate_hz(int value) {
490 sample_rate_hz_ = value;
491 num_frames_ = calculate_frames(value);
492 }
493 void set_num_channels(int value) { num_channels_ = value; }
494 void set_has_keyboard(bool value) { has_keyboard_ = value; }
495
496 int sample_rate_hz() const { return sample_rate_hz_; }
497
498 // The number of channels in the stream, not including the keyboard channel if
499 // present.
500 int num_channels() const { return num_channels_; }
501
502 bool has_keyboard() const { return has_keyboard_; }
503 int num_frames() const { return num_frames_; }
504
505 bool operator==(const StreamConfig& other) const {
506 return sample_rate_hz_ == other.sample_rate_hz_ &&
507 num_channels_ == other.num_channels_ &&
508 has_keyboard_ == other.has_keyboard_;
509 }
510
511 bool operator!=(const StreamConfig& other) const { return !(*this == other); }
512
513 private:
514 static int calculate_frames(int sample_rate_hz) {
515 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
516 }
517
518 int sample_rate_hz_;
519 int num_channels_;
520 bool has_keyboard_;
521 int num_frames_;
522 };
523
524 class ProcessingConfig {
525 public:
526 enum StreamName {
527 kInputStream,
528 kOutputStream,
529 kReverseStream,
530 kNumStreamNames,
531 };
532
533 const StreamConfig& input_stream() const {
534 return streams[StreamName::kInputStream];
535 }
536 const StreamConfig& output_stream() const {
537 return streams[StreamName::kOutputStream];
538 }
539 const StreamConfig& reverse_stream() const {
540 return streams[StreamName::kReverseStream];
541 }
542
543 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
544 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
545 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }
546
547 bool operator==(const ProcessingConfig& other) const {
548 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
549 if (this->streams[i] != other.streams[i]) {
550 return false;
551 }
552 }
553 return true;
554 }
555
556 bool operator!=(const ProcessingConfig& other) const {
557 return !(*this == other);
558 }
559
560 StreamConfig streams[StreamName::kNumStreamNames];
561 };
562
563 // The acoustic echo cancellation (AEC) component provides better performance 435 // The acoustic echo cancellation (AEC) component provides better performance
564 // than AECM but also requires more processing power and is dependent on delay 436 // than AECM but also requires more processing power and is dependent on delay
565 // stability and reporting accuracy. As such it is well-suited and recommended 437 // stability and reporting accuracy. As such it is well-suited and recommended
566 // for PC and IP phone applications. 438 // for PC and IP phone applications.
567 // 439 //
568 // Not recommended to be enabled on the server-side. 440 // Not recommended to be enabled on the server-side.
569 class EchoCancellation { 441 class EchoCancellation {
570 public: 442 public:
571 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. 443 // EchoCancellation and EchoControlMobile may not be enabled simultaneously.
572 // Enabling one will disable the other. 444 // Enabling one will disable the other.
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after
915 // This does not impact the size of frames passed to |ProcessStream()|. 787 // This does not impact the size of frames passed to |ProcessStream()|.
916 virtual int set_frame_size_ms(int size) = 0; 788 virtual int set_frame_size_ms(int size) = 0;
917 virtual int frame_size_ms() const = 0; 789 virtual int frame_size_ms() const = 0;
918 790
919 protected: 791 protected:
920 virtual ~VoiceDetection() {} 792 virtual ~VoiceDetection() {}
921 }; 793 };
922 } // namespace webrtc 794 } // namespace webrtc
923 795
924 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 796 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698