OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... |
22 | 22 |
23 struct AecCore; | 23 struct AecCore; |
24 | 24 |
25 namespace webrtc { | 25 namespace webrtc { |
26 | 26 |
27 class AudioFrame; | 27 class AudioFrame; |
28 | 28 |
29 template<typename T> | 29 template<typename T> |
30 class Beamformer; | 30 class Beamformer; |
31 | 31 |
32 class StreamConfig; | |
33 class ProcessingConfig; | |
34 | |
35 class EchoCancellation; | 32 class EchoCancellation; |
36 class EchoControlMobile; | 33 class EchoControlMobile; |
37 class GainControl; | 34 class GainControl; |
38 class HighPassFilter; | 35 class HighPassFilter; |
39 class LevelEstimator; | 36 class LevelEstimator; |
40 class NoiseSuppression; | 37 class NoiseSuppression; |
41 class VoiceDetection; | 38 class VoiceDetection; |
42 | 39 |
43 // Use to enable the extended filter mode in the AEC, along with robustness | 40 // Use to enable the extended filter mode in the AEC, along with robustness |
44 // measures around the reported system delays. It comes with a significant | 41 // measures around the reported system delays. It comes with a significant |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
80 // [12, 255]. Here, 255 maps to 100%. | 77 // [12, 255]. Here, 255 maps to 100%. |
81 // | 78 // |
82 // Must be provided through AudioProcessing::Create(Confg&). | 79 // Must be provided through AudioProcessing::Create(Confg&). |
83 #if defined(WEBRTC_CHROMIUM_BUILD) | 80 #if defined(WEBRTC_CHROMIUM_BUILD) |
84 static const int kAgcStartupMinVolume = 85; | 81 static const int kAgcStartupMinVolume = 85; |
85 #else | 82 #else |
86 static const int kAgcStartupMinVolume = 0; | 83 static const int kAgcStartupMinVolume = 0; |
87 #endif // defined(WEBRTC_CHROMIUM_BUILD) | 84 #endif // defined(WEBRTC_CHROMIUM_BUILD) |
88 struct ExperimentalAgc { | 85 struct ExperimentalAgc { |
89 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} | 86 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} |
90 explicit ExperimentalAgc(bool enabled) | 87 ExperimentalAgc(bool enabled) |
91 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} | 88 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} |
92 ExperimentalAgc(bool enabled, int startup_min_volume) | 89 ExperimentalAgc(bool enabled, int startup_min_volume) |
93 : enabled(enabled), startup_min_volume(startup_min_volume) {} | 90 : enabled(enabled), startup_min_volume(startup_min_volume) {} |
94 bool enabled; | 91 bool enabled; |
95 int startup_min_volume; | 92 int startup_min_volume; |
96 }; | 93 }; |
97 | 94 |
98 // Use to enable experimental noise suppression. It can be set in the | 95 // Use to enable experimental noise suppression. It can be set in the |
99 // constructor or using AudioProcessing::SetExtraOptions(). | 96 // constructor or using AudioProcessing::SetExtraOptions(). |
100 struct ExperimentalNs { | 97 struct ExperimentalNs { |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
195 // | 192 // |
196 // // Repeate render and capture processing for the duration of the call... | 193 // // Repeate render and capture processing for the duration of the call... |
197 // // Start a new call... | 194 // // Start a new call... |
198 // apm->Initialize(); | 195 // apm->Initialize(); |
199 // | 196 // |
200 // // Close the application... | 197 // // Close the application... |
201 // delete apm; | 198 // delete apm; |
202 // | 199 // |
203 class AudioProcessing { | 200 class AudioProcessing { |
204 public: | 201 public: |
205 // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone. | |
206 enum ChannelLayout { | 202 enum ChannelLayout { |
207 kMono, | 203 kMono, |
208 // Left, right. | 204 // Left, right. |
209 kStereo, | 205 kStereo, |
210 // Mono, keyboard mic. | 206 // Mono, keyboard mic. |
211 kMonoAndKeyboard, | 207 kMonoAndKeyboard, |
212 // Left, right, keyboard mic. | 208 // Left, right, keyboard mic. |
213 kStereoAndKeyboard | 209 kStereoAndKeyboard |
214 }; | 210 }; |
215 | 211 |
(...skipping 17 matching lines...) Expand all Loading... |
233 // | 229 // |
234 // It is also not necessary to call if the audio parameters (sample | 230 // It is also not necessary to call if the audio parameters (sample |
235 // rate and number of channels) have changed. Passing updated parameters | 231 // rate and number of channels) have changed. Passing updated parameters |
236 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. | 232 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. |
237 // If the parameters are known at init-time though, they may be provided. | 233 // If the parameters are known at init-time though, they may be provided. |
238 virtual int Initialize() = 0; | 234 virtual int Initialize() = 0; |
239 | 235 |
240 // The int16 interfaces require: | 236 // The int16 interfaces require: |
241 // - only |NativeRate|s be used | 237 // - only |NativeRate|s be used |
242 // - that the input, output and reverse rates must match | 238 // - that the input, output and reverse rates must match |
243 // - that |processing_config.output_stream()| matches | 239 // - that |output_layout| matches |input_layout| |
244 // |processing_config.input_stream()|. | |
245 // | 240 // |
246 // The float interfaces accept arbitrary rates and support differing input and | 241 // The float interfaces accept arbitrary rates and support differing input |
247 // output layouts, but the output must have either one channel or the same | 242 // and output layouts, but the output may only remove channels, not add. |
248 // number of channels as the input. | |
249 virtual int Initialize(const ProcessingConfig& processing_config) = 0; | |
250 | |
251 // Initialize with unpacked parameters. See Initialize() above for details. | |
252 // | |
253 // TODO(mgraczyk): Remove once clients are updated to use the new interface. | |
254 virtual int Initialize(int input_sample_rate_hz, | 243 virtual int Initialize(int input_sample_rate_hz, |
255 int output_sample_rate_hz, | 244 int output_sample_rate_hz, |
256 int reverse_sample_rate_hz, | 245 int reverse_sample_rate_hz, |
257 ChannelLayout input_layout, | 246 ChannelLayout input_layout, |
258 ChannelLayout output_layout, | 247 ChannelLayout output_layout, |
259 ChannelLayout reverse_layout) = 0; | 248 ChannelLayout reverse_layout) = 0; |
260 | 249 |
261 // Pass down additional options which don't have explicit setters. This | 250 // Pass down additional options which don't have explicit setters. This |
262 // ensures the options are applied immediately. | 251 // ensures the options are applied immediately. |
263 virtual void SetExtraOptions(const Config& config) = 0; | 252 virtual void SetExtraOptions(const Config& config) = 0; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
296 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 285 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
297 // members of |frame| must be valid. If changed from the previous call to this | 286 // members of |frame| must be valid. If changed from the previous call to this |
298 // method, it will trigger an initialization. | 287 // method, it will trigger an initialization. |
299 virtual int ProcessStream(AudioFrame* frame) = 0; | 288 virtual int ProcessStream(AudioFrame* frame) = 0; |
300 | 289 |
301 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 290 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
302 // of |src| points to a channel buffer, arranged according to | 291 // of |src| points to a channel buffer, arranged according to |
303 // |input_layout|. At output, the channels will be arranged according to | 292 // |input_layout|. At output, the channels will be arranged according to |
304 // |output_layout| at |output_sample_rate_hz| in |dest|. | 293 // |output_layout| at |output_sample_rate_hz| in |dest|. |
305 // | 294 // |
306 // The output layout must have one channel or as many channels as the input. | 295 // The output layout may only remove channels, not add. |src| and |dest| |
307 // |src| and |dest| may use the same memory, if desired. | 296 // may use the same memory, if desired. |
308 // | |
309 // TODO(mgraczyk): Remove once clients are updated to use the new interface. | |
310 virtual int ProcessStream(const float* const* src, | 297 virtual int ProcessStream(const float* const* src, |
311 int samples_per_channel, | 298 int samples_per_channel, |
312 int input_sample_rate_hz, | 299 int input_sample_rate_hz, |
313 ChannelLayout input_layout, | 300 ChannelLayout input_layout, |
314 int output_sample_rate_hz, | 301 int output_sample_rate_hz, |
315 ChannelLayout output_layout, | 302 ChannelLayout output_layout, |
316 float* const* dest) = 0; | 303 float* const* dest) = 0; |
317 | 304 |
318 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of | |
319 // |src| points to a channel buffer, arranged according to |input_stream|. At | |
320 // output, the channels will be arranged according to |output_stream| in | |
321 // |dest|. | |
322 // | |
323 // The output must have one channel or as many channels as the input. |src| | |
324 // and |dest| may use the same memory, if desired. | |
325 virtual int ProcessStream(const float* const* src, | |
326 const StreamConfig& input_config, | |
327 const StreamConfig& output_config, | |
328 float* const* dest) = 0; | |
329 | |
330 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame | 305 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame |
331 // will not be modified. On the client-side, this is the far-end (or to be | 306 // will not be modified. On the client-side, this is the far-end (or to be |
332 // rendered) audio. | 307 // rendered) audio. |
333 // | 308 // |
334 // It is only necessary to provide this if echo processing is enabled, as the | 309 // It is only necessary to provide this if echo processing is enabled, as the |
335 // reverse stream forms the echo reference signal. It is recommended, but not | 310 // reverse stream forms the echo reference signal. It is recommended, but not |
336 // necessary, to provide if gain control is enabled. On the server-side this | 311 // necessary, to provide if gain control is enabled. On the server-side this |
337 // typically will not be used. If you're not sure what to pass in here, | 312 // typically will not be used. If you're not sure what to pass in here, |
338 // chances are you don't need to use it. | 313 // chances are you don't need to use it. |
339 // | 314 // |
340 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 315 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
341 // members of |frame| must be valid. |sample_rate_hz_| must correspond to | 316 // members of |frame| must be valid. |sample_rate_hz_| must correspond to |
342 // |input_sample_rate_hz()| | 317 // |input_sample_rate_hz()| |
343 // | 318 // |
344 // TODO(ajm): add const to input; requires an implementation fix. | 319 // TODO(ajm): add const to input; requires an implementation fix. |
345 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; | 320 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; |
346 | 321 |
347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 322 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
348 // of |data| points to a channel buffer, arranged according to |layout|. | 323 // of |data| points to a channel buffer, arranged according to |layout|. |
349 // | |
350 // TODO(mgraczyk): Remove once clients are updated to use the new interface. | |
351 virtual int AnalyzeReverseStream(const float* const* data, | 324 virtual int AnalyzeReverseStream(const float* const* data, |
352 int samples_per_channel, | 325 int samples_per_channel, |
353 int sample_rate_hz, | 326 int sample_rate_hz, |
354 ChannelLayout layout) = 0; | 327 ChannelLayout layout) = 0; |
355 | 328 |
356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of | |
357 // |data| points to a channel buffer, arranged according to |reverse_config|. | |
358 virtual int AnalyzeReverseStream(const float* const* data, | |
359 const StreamConfig& reverse_config) = 0; | |
360 | |
361 // This must be called if and only if echo processing is enabled. | 329 // This must be called if and only if echo processing is enabled. |
362 // | 330 // |
363 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end | 331 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end |
364 // frame and ProcessStream() receiving a near-end frame containing the | 332 // frame and ProcessStream() receiving a near-end frame containing the |
365 // corresponding echo. On the client-side this can be expressed as | 333 // corresponding echo. On the client-side this can be expressed as |
366 // delay = (t_render - t_analyze) + (t_process - t_capture) | 334 // delay = (t_render - t_analyze) + (t_process - t_capture) |
367 // where, | 335 // where, |
368 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and | 336 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and |
369 // t_render is the time the first sample of the same frame is rendered by | 337 // t_render is the time the first sample of the same frame is rendered by |
370 // the audio hardware. | 338 // the audio hardware. |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
457 enum NativeRate { | 425 enum NativeRate { |
458 kSampleRate8kHz = 8000, | 426 kSampleRate8kHz = 8000, |
459 kSampleRate16kHz = 16000, | 427 kSampleRate16kHz = 16000, |
460 kSampleRate32kHz = 32000, | 428 kSampleRate32kHz = 32000, |
461 kSampleRate48kHz = 48000 | 429 kSampleRate48kHz = 48000 |
462 }; | 430 }; |
463 | 431 |
464 static const int kChunkSizeMs = 10; | 432 static const int kChunkSizeMs = 10; |
465 }; | 433 }; |
466 | 434 |
467 class StreamConfig { | |
468 public: | |
469 // sample_rate_hz: The sampling rate of the stream. | |
470 // | |
471 // num_channels: The number of audio channels in the stream, excluding the | |
472 // keyboard channel if it is present. When passing a | |
473 // StreamConfig with an array of arrays T*[N], | |
474 // | |
475 // N == {num_channels + 1 if has_keyboard | |
476 // {num_channels if !has_keyboard | |
477 // | |
478 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard | |
479 // is true, the last channel in any corresponding list of | |
480 // channels is the keyboard channel. | |
481 StreamConfig(int sample_rate_hz = 0, | |
482 int num_channels = 0, | |
483 bool has_keyboard = false) | |
484 : sample_rate_hz_(sample_rate_hz), | |
485 num_channels_(num_channels), | |
486 has_keyboard_(has_keyboard), | |
487 num_frames_(calculate_frames(sample_rate_hz)) {} | |
488 | |
489 void set_sample_rate_hz(int value) { | |
490 sample_rate_hz_ = value; | |
491 num_frames_ = calculate_frames(value); | |
492 } | |
493 void set_num_channels(int value) { num_channels_ = value; } | |
494 void set_has_keyboard(bool value) { has_keyboard_ = value; } | |
495 | |
496 int sample_rate_hz() const { return sample_rate_hz_; } | |
497 | |
498 // The number of channels in the stream, not including the keyboard channel if | |
499 // present. | |
500 int num_channels() const { return num_channels_; } | |
501 | |
502 bool has_keyboard() const { return has_keyboard_; } | |
503 int num_frames() const { return num_frames_; } | |
504 | |
505 bool operator==(const StreamConfig& other) const { | |
506 return sample_rate_hz_ == other.sample_rate_hz_ && | |
507 num_channels_ == other.num_channels_ && | |
508 has_keyboard_ == other.has_keyboard_; | |
509 } | |
510 | |
511 bool operator!=(const StreamConfig& other) const { return !(*this == other); } | |
512 | |
513 private: | |
514 static int calculate_frames(int sample_rate_hz) { | |
515 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; | |
516 } | |
517 | |
518 int sample_rate_hz_; | |
519 int num_channels_; | |
520 bool has_keyboard_; | |
521 int num_frames_; | |
522 }; | |
523 | |
524 class ProcessingConfig { | |
525 public: | |
526 enum StreamName { | |
527 kInputStream, | |
528 kOutputStream, | |
529 kReverseStream, | |
530 kNumStreamNames, | |
531 }; | |
532 | |
533 const StreamConfig& input_stream() const { | |
534 return streams[StreamName::kInputStream]; | |
535 } | |
536 const StreamConfig& output_stream() const { | |
537 return streams[StreamName::kOutputStream]; | |
538 } | |
539 const StreamConfig& reverse_stream() const { | |
540 return streams[StreamName::kReverseStream]; | |
541 } | |
542 | |
543 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } | |
544 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } | |
545 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } | |
546 | |
547 bool operator==(const ProcessingConfig& other) const { | |
548 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { | |
549 if (this->streams[i] != other.streams[i]) { | |
550 return false; | |
551 } | |
552 } | |
553 return true; | |
554 } | |
555 | |
556 bool operator!=(const ProcessingConfig& other) const { | |
557 return !(*this == other); | |
558 } | |
559 | |
560 StreamConfig streams[StreamName::kNumStreamNames]; | |
561 }; | |
562 | |
563 // The acoustic echo cancellation (AEC) component provides better performance | 435 // The acoustic echo cancellation (AEC) component provides better performance |
564 // than AECM but also requires more processing power and is dependent on delay | 436 // than AECM but also requires more processing power and is dependent on delay |
565 // stability and reporting accuracy. As such it is well-suited and recommended | 437 // stability and reporting accuracy. As such it is well-suited and recommended |
566 // for PC and IP phone applications. | 438 // for PC and IP phone applications. |
567 // | 439 // |
568 // Not recommended to be enabled on the server-side. | 440 // Not recommended to be enabled on the server-side. |
569 class EchoCancellation { | 441 class EchoCancellation { |
570 public: | 442 public: |
571 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. | 443 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. |
572 // Enabling one will disable the other. | 444 // Enabling one will disable the other. |
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
915 // This does not impact the size of frames passed to |ProcessStream()|. | 787 // This does not impact the size of frames passed to |ProcessStream()|. |
916 virtual int set_frame_size_ms(int size) = 0; | 788 virtual int set_frame_size_ms(int size) = 0; |
917 virtual int frame_size_ms() const = 0; | 789 virtual int frame_size_ms() const = 0; |
918 | 790 |
919 protected: | 791 protected: |
920 virtual ~VoiceDetection() {} | 792 virtual ~VoiceDetection() {} |
921 }; | 793 }; |
922 } // namespace webrtc | 794 } // namespace webrtc |
923 | 795 |
924 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ | 796 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ |
OLD | NEW |