OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... |
22 | 22 |
23 struct AecCore; | 23 struct AecCore; |
24 | 24 |
25 namespace webrtc { | 25 namespace webrtc { |
26 | 26 |
27 class AudioFrame; | 27 class AudioFrame; |
28 | 28 |
29 template<typename T> | 29 template<typename T> |
30 class Beamformer; | 30 class Beamformer; |
31 | 31 |
| 32 class StreamConfig; |
| 33 class ProcessingConfig; |
| 34 |
32 class EchoCancellation; | 35 class EchoCancellation; |
33 class EchoControlMobile; | 36 class EchoControlMobile; |
34 class GainControl; | 37 class GainControl; |
35 class HighPassFilter; | 38 class HighPassFilter; |
36 class LevelEstimator; | 39 class LevelEstimator; |
37 class NoiseSuppression; | 40 class NoiseSuppression; |
38 class VoiceDetection; | 41 class VoiceDetection; |
39 | 42 |
40 // Use to enable the extended filter mode in the AEC, along with robustness | 43 // Use to enable the extended filter mode in the AEC, along with robustness |
41 // measures around the reported system delays. It comes with a significant | 44 // measures around the reported system delays. It comes with a significant |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 // [12, 255]. Here, 255 maps to 100%. | 80 // [12, 255]. Here, 255 maps to 100%. |
78 // | 81 // |
79 // Must be provided through AudioProcessing::Create(Confg&). | 82 // Must be provided through AudioProcessing::Create(Confg&). |
80 #if defined(WEBRTC_CHROMIUM_BUILD) | 83 #if defined(WEBRTC_CHROMIUM_BUILD) |
81 static const int kAgcStartupMinVolume = 85; | 84 static const int kAgcStartupMinVolume = 85; |
82 #else | 85 #else |
83 static const int kAgcStartupMinVolume = 0; | 86 static const int kAgcStartupMinVolume = 0; |
84 #endif // defined(WEBRTC_CHROMIUM_BUILD) | 87 #endif // defined(WEBRTC_CHROMIUM_BUILD) |
85 struct ExperimentalAgc { | 88 struct ExperimentalAgc { |
86 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} | 89 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} |
87 ExperimentalAgc(bool enabled) | 90 explicit ExperimentalAgc(bool enabled) |
88 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} | 91 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} |
89 ExperimentalAgc(bool enabled, int startup_min_volume) | 92 ExperimentalAgc(bool enabled, int startup_min_volume) |
90 : enabled(enabled), startup_min_volume(startup_min_volume) {} | 93 : enabled(enabled), startup_min_volume(startup_min_volume) {} |
91 bool enabled; | 94 bool enabled; |
92 int startup_min_volume; | 95 int startup_min_volume; |
93 }; | 96 }; |
94 | 97 |
95 // Use to enable experimental noise suppression. It can be set in the | 98 // Use to enable experimental noise suppression. It can be set in the |
96 // constructor or using AudioProcessing::SetExtraOptions(). | 99 // constructor or using AudioProcessing::SetExtraOptions(). |
97 struct ExperimentalNs { | 100 struct ExperimentalNs { |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
192 // | 195 // |
193 // // Repeate render and capture processing for the duration of the call... | 196 // // Repeate render and capture processing for the duration of the call... |
194 // // Start a new call... | 197 // // Start a new call... |
195 // apm->Initialize(); | 198 // apm->Initialize(); |
196 // | 199 // |
197 // // Close the application... | 200 // // Close the application... |
198 // delete apm; | 201 // delete apm; |
199 // | 202 // |
200 class AudioProcessing { | 203 class AudioProcessing { |
201 public: | 204 public: |
| 205 // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone. |
202 enum ChannelLayout { | 206 enum ChannelLayout { |
203 kMono, | 207 kMono, |
204 // Left, right. | 208 // Left, right. |
205 kStereo, | 209 kStereo, |
206 // Mono, keyboard mic. | 210 // Mono, keyboard mic. |
207 kMonoAndKeyboard, | 211 kMonoAndKeyboard, |
208 // Left, right, keyboard mic. | 212 // Left, right, keyboard mic. |
209 kStereoAndKeyboard | 213 kStereoAndKeyboard |
210 }; | 214 }; |
211 | 215 |
(...skipping 17 matching lines...) Expand all Loading... |
229 // | 233 // |
230 // It is also not necessary to call if the audio parameters (sample | 234 // It is also not necessary to call if the audio parameters (sample |
231 // rate and number of channels) have changed. Passing updated parameters | 235 // rate and number of channels) have changed. Passing updated parameters |
232 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. | 236 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. |
233 // If the parameters are known at init-time though, they may be provided. | 237 // If the parameters are known at init-time though, they may be provided. |
234 virtual int Initialize() = 0; | 238 virtual int Initialize() = 0; |
235 | 239 |
236 // The int16 interfaces require: | 240 // The int16 interfaces require: |
237 // - only |NativeRate|s be used | 241 // - only |NativeRate|s be used |
238 // - that the input, output and reverse rates must match | 242 // - that the input, output and reverse rates must match |
239 // - that |output_layout| matches |input_layout| | 243 // - that |processing_config.output_stream()| matches |
| 244 // |processing_config.input_stream()|. |
240 // | 245 // |
241 // The float interfaces accept arbitrary rates and support differing input | 246 // The float interfaces accept arbitrary rates and support differing input and |
242 // and output layouts, but the output may only remove channels, not add. | 247 // output layouts, but the output must have either one channel or the same |
| 248 // number of channels as the input. |
| 249 virtual int Initialize(const ProcessingConfig& processing_config) = 0; |
| 250 |
| 251 // Initialize with unpacked parameters. See Initialize() above for details. |
| 252 // |
| 253 // TODO(mgraczyk): Remove once clients are updated to use the new interface. |
243 virtual int Initialize(int input_sample_rate_hz, | 254 virtual int Initialize(int input_sample_rate_hz, |
244 int output_sample_rate_hz, | 255 int output_sample_rate_hz, |
245 int reverse_sample_rate_hz, | 256 int reverse_sample_rate_hz, |
246 ChannelLayout input_layout, | 257 ChannelLayout input_layout, |
247 ChannelLayout output_layout, | 258 ChannelLayout output_layout, |
248 ChannelLayout reverse_layout) = 0; | 259 ChannelLayout reverse_layout) = 0; |
249 | 260 |
250 // Pass down additional options which don't have explicit setters. This | 261 // Pass down additional options which don't have explicit setters. This |
251 // ensures the options are applied immediately. | 262 // ensures the options are applied immediately. |
252 virtual void SetExtraOptions(const Config& config) = 0; | 263 virtual void SetExtraOptions(const Config& config) = 0; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
285 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 296 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
286 // members of |frame| must be valid. If changed from the previous call to this | 297 // members of |frame| must be valid. If changed from the previous call to this |
287 // method, it will trigger an initialization. | 298 // method, it will trigger an initialization. |
288 virtual int ProcessStream(AudioFrame* frame) = 0; | 299 virtual int ProcessStream(AudioFrame* frame) = 0; |
289 | 300 |
290 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 301 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
291 // of |src| points to a channel buffer, arranged according to | 302 // of |src| points to a channel buffer, arranged according to |
292 // |input_layout|. At output, the channels will be arranged according to | 303 // |input_layout|. At output, the channels will be arranged according to |
293 // |output_layout| at |output_sample_rate_hz| in |dest|. | 304 // |output_layout| at |output_sample_rate_hz| in |dest|. |
294 // | 305 // |
295 // The output layout may only remove channels, not add. |src| and |dest| | 306 // The output layout must have one channel or as many channels as the input. |
296 // may use the same memory, if desired. | 307 // |src| and |dest| may use the same memory, if desired. |
| 308 // |
| 309 // TODO(mgraczyk): Remove once clients are updated to use the new interface. |
297 virtual int ProcessStream(const float* const* src, | 310 virtual int ProcessStream(const float* const* src, |
298 int samples_per_channel, | 311 int samples_per_channel, |
299 int input_sample_rate_hz, | 312 int input_sample_rate_hz, |
300 ChannelLayout input_layout, | 313 ChannelLayout input_layout, |
301 int output_sample_rate_hz, | 314 int output_sample_rate_hz, |
302 ChannelLayout output_layout, | 315 ChannelLayout output_layout, |
303 float* const* dest) = 0; | 316 float* const* dest) = 0; |
304 | 317 |
| 318 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of |
| 319 // |src| points to a channel buffer, arranged according to |input_stream|. At |
| 320 // output, the channels will be arranged according to |output_stream| in |
| 321 // |dest|. |
| 322 // |
| 323 // The output must have one channel or as many channels as the input. |src| |
| 324 // and |dest| may use the same memory, if desired. |
| 325 virtual int ProcessStream(const float* const* src, |
| 326 const StreamConfig& input_config, |
| 327 const StreamConfig& output_config, |
| 328 float* const* dest) = 0; |
| 329 |
305 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame | 330 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame |
306 // will not be modified. On the client-side, this is the far-end (or to be | 331 // will not be modified. On the client-side, this is the far-end (or to be |
307 // rendered) audio. | 332 // rendered) audio. |
308 // | 333 // |
309 // It is only necessary to provide this if echo processing is enabled, as the | 334 // It is only necessary to provide this if echo processing is enabled, as the |
310 // reverse stream forms the echo reference signal. It is recommended, but not | 335 // reverse stream forms the echo reference signal. It is recommended, but not |
311 // necessary, to provide if gain control is enabled. On the server-side this | 336 // necessary, to provide if gain control is enabled. On the server-side this |
312 // typically will not be used. If you're not sure what to pass in here, | 337 // typically will not be used. If you're not sure what to pass in here, |
313 // chances are you don't need to use it. | 338 // chances are you don't need to use it. |
314 // | 339 // |
315 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 340 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
316 // members of |frame| must be valid. |sample_rate_hz_| must correspond to | 341 // members of |frame| must be valid. |sample_rate_hz_| must correspond to |
317 // |input_sample_rate_hz()| | 342 // |input_sample_rate_hz()| |
318 // | 343 // |
319 // TODO(ajm): add const to input; requires an implementation fix. | 344 // TODO(ajm): add const to input; requires an implementation fix. |
320 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; | 345 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; |
321 | 346 |
322 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
323 // of |data| points to a channel buffer, arranged according to |layout|. | 348 // of |data| points to a channel buffer, arranged according to |layout|. |
| 349 // |
| 350 // TODO(mgraczyk): Remove once clients are updated to use the new interface. |
324 virtual int AnalyzeReverseStream(const float* const* data, | 351 virtual int AnalyzeReverseStream(const float* const* data, |
325 int samples_per_channel, | 352 int samples_per_channel, |
326 int sample_rate_hz, | 353 int sample_rate_hz, |
327 ChannelLayout layout) = 0; | 354 ChannelLayout layout) = 0; |
328 | 355 |
| 356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of |
| 357 // |data| points to a channel buffer, arranged according to |reverse_config|. |
| 358 virtual int AnalyzeReverseStream(const float* const* data, |
| 359 const StreamConfig& reverse_config) = 0; |
| 360 |
329 // This must be called if and only if echo processing is enabled. | 361 // This must be called if and only if echo processing is enabled. |
330 // | 362 // |
331 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end | 363 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end |
332 // frame and ProcessStream() receiving a near-end frame containing the | 364 // frame and ProcessStream() receiving a near-end frame containing the |
333 // corresponding echo. On the client-side this can be expressed as | 365 // corresponding echo. On the client-side this can be expressed as |
334 // delay = (t_render - t_analyze) + (t_process - t_capture) | 366 // delay = (t_render - t_analyze) + (t_process - t_capture) |
335 // where, | 367 // where, |
336 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and | 368 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and |
337 // t_render is the time the first sample of the same frame is rendered by | 369 // t_render is the time the first sample of the same frame is rendered by |
338 // the audio hardware. | 370 // the audio hardware. |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
425 enum NativeRate { | 457 enum NativeRate { |
426 kSampleRate8kHz = 8000, | 458 kSampleRate8kHz = 8000, |
427 kSampleRate16kHz = 16000, | 459 kSampleRate16kHz = 16000, |
428 kSampleRate32kHz = 32000, | 460 kSampleRate32kHz = 32000, |
429 kSampleRate48kHz = 48000 | 461 kSampleRate48kHz = 48000 |
430 }; | 462 }; |
431 | 463 |
432 static const int kChunkSizeMs = 10; | 464 static const int kChunkSizeMs = 10; |
433 }; | 465 }; |
434 | 466 |
| 467 class StreamConfig { |
| 468 public: |
| 469 // sample_rate_hz: The sampling rate of the stream. |
| 470 // |
| 471 // num_channels: The number of audio channels in the stream, excluding the |
| 472 // keyboard channel if it is present. When passing a |
| 473 // StreamConfig with an array of arrays T*[N], |
| 474 // |
| 475 // N == {num_channels + 1 if has_keyboard |
| 476 // {num_channels if !has_keyboard |
| 477 // |
| 478 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard |
| 479 // is true, the last channel in any corresponding list of |
| 480 // channels is the keyboard channel. |
| 481 StreamConfig(int sample_rate_hz = 0, |
| 482 int num_channels = 0, |
| 483 bool has_keyboard = false) |
| 484 : sample_rate_hz_(sample_rate_hz), |
| 485 num_channels_(num_channels), |
| 486 has_keyboard_(has_keyboard), |
| 487 num_frames_(calculate_frames(sample_rate_hz)) {} |
| 488 |
| 489 void set_sample_rate_hz(int value) { |
| 490 sample_rate_hz_ = value; |
| 491 num_frames_ = calculate_frames(value); |
| 492 } |
| 493 void set_num_channels(int value) { num_channels_ = value; } |
| 494 void set_has_keyboard(bool value) { has_keyboard_ = value; } |
| 495 |
| 496 int sample_rate_hz() const { return sample_rate_hz_; } |
| 497 |
| 498 // The number of channels in the stream, not including the keyboard channel if |
| 499 // present. |
| 500 int num_channels() const { return num_channels_; } |
| 501 |
| 502 bool has_keyboard() const { return has_keyboard_; } |
| 503 int num_frames() const { return num_frames_; } |
| 504 |
| 505 bool operator==(const StreamConfig& other) const { |
| 506 return sample_rate_hz_ == other.sample_rate_hz_ && |
| 507 num_channels_ == other.num_channels_ && |
| 508 has_keyboard_ == other.has_keyboard_; |
| 509 } |
| 510 |
| 511 bool operator!=(const StreamConfig& other) const { return !(*this == other); } |
| 512 |
| 513 private: |
| 514 static int calculate_frames(int sample_rate_hz) { |
| 515 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; |
| 516 } |
| 517 |
| 518 int sample_rate_hz_; |
| 519 int num_channels_; |
| 520 bool has_keyboard_; |
| 521 int num_frames_; |
| 522 }; |
| 523 |
| 524 class ProcessingConfig { |
| 525 public: |
| 526 enum StreamName { |
| 527 kInputStream, |
| 528 kOutputStream, |
| 529 kReverseStream, |
| 530 kNumStreamNames, |
| 531 }; |
| 532 |
| 533 const StreamConfig& input_stream() const { |
| 534 return streams[StreamName::kInputStream]; |
| 535 } |
| 536 const StreamConfig& output_stream() const { |
| 537 return streams[StreamName::kOutputStream]; |
| 538 } |
| 539 const StreamConfig& reverse_stream() const { |
| 540 return streams[StreamName::kReverseStream]; |
| 541 } |
| 542 |
| 543 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } |
| 544 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } |
| 545 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } |
| 546 |
| 547 bool operator==(const ProcessingConfig& other) const { |
| 548 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { |
| 549 if (this->streams[i] != other.streams[i]) { |
| 550 return false; |
| 551 } |
| 552 } |
| 553 return true; |
| 554 } |
| 555 |
| 556 bool operator!=(const ProcessingConfig& other) const { |
| 557 return !(*this == other); |
| 558 } |
| 559 |
| 560 StreamConfig streams[StreamName::kNumStreamNames]; |
| 561 }; |
| 562 |
435 // The acoustic echo cancellation (AEC) component provides better performance | 563 // The acoustic echo cancellation (AEC) component provides better performance |
436 // than AECM but also requires more processing power and is dependent on delay | 564 // than AECM but also requires more processing power and is dependent on delay |
437 // stability and reporting accuracy. As such it is well-suited and recommended | 565 // stability and reporting accuracy. As such it is well-suited and recommended |
438 // for PC and IP phone applications. | 566 // for PC and IP phone applications. |
439 // | 567 // |
440 // Not recommended to be enabled on the server-side. | 568 // Not recommended to be enabled on the server-side. |
441 class EchoCancellation { | 569 class EchoCancellation { |
442 public: | 570 public: |
443 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. | 571 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. |
444 // Enabling one will disable the other. | 572 // Enabling one will disable the other. |
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
787 // This does not impact the size of frames passed to |ProcessStream()|. | 915 // This does not impact the size of frames passed to |ProcessStream()|. |
788 virtual int set_frame_size_ms(int size) = 0; | 916 virtual int set_frame_size_ms(int size) = 0; |
789 virtual int frame_size_ms() const = 0; | 917 virtual int frame_size_ms() const = 0; |
790 | 918 |
791 protected: | 919 protected: |
792 virtual ~VoiceDetection() {} | 920 virtual ~VoiceDetection() {} |
793 }; | 921 }; |
794 } // namespace webrtc | 922 } // namespace webrtc |
795 | 923 |
796 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ | 924 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ |
OLD | NEW |