Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 22 | 22 |
| 23 struct AecCore; | 23 struct AecCore; |
| 24 | 24 |
| 25 namespace webrtc { | 25 namespace webrtc { |
| 26 | 26 |
| 27 class AudioFrame; | 27 class AudioFrame; |
| 28 | 28 |
| 29 template<typename T> | 29 template<typename T> |
| 30 class Beamformer; | 30 class Beamformer; |
| 31 | 31 |
| 32 class StreamConfig; | |
| 33 class ProcessingConfig; | |
| 34 | |
| 32 class EchoCancellation; | 35 class EchoCancellation; |
| 33 class EchoControlMobile; | 36 class EchoControlMobile; |
| 34 class GainControl; | 37 class GainControl; |
| 35 class HighPassFilter; | 38 class HighPassFilter; |
| 36 class LevelEstimator; | 39 class LevelEstimator; |
| 37 class NoiseSuppression; | 40 class NoiseSuppression; |
| 38 class VoiceDetection; | 41 class VoiceDetection; |
| 39 | 42 |
| 40 // Use to enable the extended filter mode in the AEC, along with robustness | 43 // Use to enable the extended filter mode in the AEC, along with robustness |
| 41 // measures around the reported system delays. It comes with a significant | 44 // measures around the reported system delays. It comes with a significant |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 77 // [12, 255]. Here, 255 maps to 100%. | 80 // [12, 255]. Here, 255 maps to 100%. |
| 78 // | 81 // |
| 79 // Must be provided through AudioProcessing::Create(Confg&). | 82 // Must be provided through AudioProcessing::Create(Confg&). |
| 80 #if defined(WEBRTC_CHROMIUM_BUILD) | 83 #if defined(WEBRTC_CHROMIUM_BUILD) |
| 81 static const int kAgcStartupMinVolume = 85; | 84 static const int kAgcStartupMinVolume = 85; |
| 82 #else | 85 #else |
| 83 static const int kAgcStartupMinVolume = 0; | 86 static const int kAgcStartupMinVolume = 0; |
| 84 #endif // defined(WEBRTC_CHROMIUM_BUILD) | 87 #endif // defined(WEBRTC_CHROMIUM_BUILD) |
| 85 struct ExperimentalAgc { | 88 struct ExperimentalAgc { |
| 86 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} | 89 ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} |
| 87 ExperimentalAgc(bool enabled) | 90 explicit ExperimentalAgc(bool enabled) |
| 88 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} | 91 : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} |
| 89 ExperimentalAgc(bool enabled, int startup_min_volume) | 92 ExperimentalAgc(bool enabled, int startup_min_volume) |
| 90 : enabled(enabled), startup_min_volume(startup_min_volume) {} | 93 : enabled(enabled), startup_min_volume(startup_min_volume) {} |
| 91 bool enabled; | 94 bool enabled; |
| 92 int startup_min_volume; | 95 int startup_min_volume; |
| 93 }; | 96 }; |
| 94 | 97 |
| 95 // Use to enable experimental noise suppression. It can be set in the | 98 // Use to enable experimental noise suppression. It can be set in the |
| 96 // constructor or using AudioProcessing::SetExtraOptions(). | 99 // constructor or using AudioProcessing::SetExtraOptions(). |
| 97 struct ExperimentalNs { | 100 struct ExperimentalNs { |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 229 // | 232 // |
| 230 // It is also not necessary to call if the audio parameters (sample | 233 // It is also not necessary to call if the audio parameters (sample |
| 231 // rate and number of channels) have changed. Passing updated parameters | 234 // rate and number of channels) have changed. Passing updated parameters |
| 232 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. | 235 // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. |
| 233 // If the parameters are known at init-time though, they may be provided. | 236 // If the parameters are known at init-time though, they may be provided. |
| 234 virtual int Initialize() = 0; | 237 virtual int Initialize() = 0; |
| 235 | 238 |
| 236 // The int16 interfaces require: | 239 // The int16 interfaces require: |
| 237 // - only |NativeRate|s be used | 240 // - only |NativeRate|s be used |
| 238 // - that the input, output and reverse rates must match | 241 // - that the input, output and reverse rates must match |
| 239 // - that |output_layout| matches |input_layout| | 242 // - that |processing_config.output_stream()| matches |
| 243 // |processing_config.input_stream()|. | |
| 240 // | 244 // |
| 241 // The float interfaces accept arbitrary rates and support differing input | 245 // The float interfaces accept arbitrary rates and support differing input and |
| 242 // and output layouts, but the output may only remove channels, not add. | 246 // output layouts, but the output must have either one channel or the same |
| 247 // number of channels as the input. | |
| 248 virtual int Initialize(const ProcessingConfig& processing_config) = 0; | |
| 249 | |
| 250 // Initialize with unpacked parameters. See Initialize() above for details. | |
| 243 virtual int Initialize(int input_sample_rate_hz, | 251 virtual int Initialize(int input_sample_rate_hz, |
| 244 int output_sample_rate_hz, | 252 int output_sample_rate_hz, |
| 245 int reverse_sample_rate_hz, | 253 int reverse_sample_rate_hz, |
| 246 ChannelLayout input_layout, | 254 ChannelLayout input_layout, |
| 247 ChannelLayout output_layout, | 255 ChannelLayout output_layout, |
| 248 ChannelLayout reverse_layout) = 0; | 256 ChannelLayout reverse_layout) = 0; |
| 249 | 257 |
| 250 // Pass down additional options which don't have explicit setters. This | 258 // Pass down additional options which don't have explicit setters. This |
| 251 // ensures the options are applied immediately. | 259 // ensures the options are applied immediately. |
| 252 virtual void SetExtraOptions(const Config& config) = 0; | 260 virtual void SetExtraOptions(const Config& config) = 0; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 285 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 293 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
| 286 // members of |frame| must be valid. If changed from the previous call to this | 294 // members of |frame| must be valid. If changed from the previous call to this |
| 287 // method, it will trigger an initialization. | 295 // method, it will trigger an initialization. |
| 288 virtual int ProcessStream(AudioFrame* frame) = 0; | 296 virtual int ProcessStream(AudioFrame* frame) = 0; |
| 289 | 297 |
| 290 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 298 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
| 291 // of |src| points to a channel buffer, arranged according to | 299 // of |src| points to a channel buffer, arranged according to |
| 292 // |input_layout|. At output, the channels will be arranged according to | 300 // |input_layout|. At output, the channels will be arranged according to |
| 293 // |output_layout| at |output_sample_rate_hz| in |dest|. | 301 // |output_layout| at |output_sample_rate_hz| in |dest|. |
| 294 // | 302 // |
| 295 // The output layout may only remove channels, not add. |src| and |dest| | 303 // The output layout must have one channel or as many channels as the input. |
| 296 // may use the same memory, if desired. | 304 // |src| and |dest| may use the same memory, if desired. |
| 297 virtual int ProcessStream(const float* const* src, | 305 virtual int ProcessStream(const float* const* src, |
|
ajm
2015/07/15 05:21:20
Can you add a TODO here and on the deprecated Anal
mgraczyk
2015/07/15 20:03:20
Done.
| |
| 298 int samples_per_channel, | 306 int samples_per_channel, |
| 299 int input_sample_rate_hz, | 307 int input_sample_rate_hz, |
| 300 ChannelLayout input_layout, | 308 ChannelLayout input_layout, |
| 301 int output_sample_rate_hz, | 309 int output_sample_rate_hz, |
| 302 ChannelLayout output_layout, | 310 ChannelLayout output_layout, |
| 303 float* const* dest) = 0; | 311 float* const* dest) = 0; |
| 304 | 312 |
| 313 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of | |
| 314 // |src| points to a channel buffer, arranged according to | |
| 315 // |processing_config.input_stream()|. At output, the channels will be | |
| 316 // arranged according to |processing_config.output_stream()| in |dest|. | |
| 317 // | |
| 318 // The output must have one channel or as many channels as the input. |src| | |
| 319 // and |dest| may use the same memory, if desired. | |
| 320 virtual int ProcessStream(const float* const* src, | |
| 321 const ProcessingConfig& processing_config, | |
| 322 float* const* dest) = 0; | |
| 323 | |
| 305 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame | 324 // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame |
| 306 // will not be modified. On the client-side, this is the far-end (or to be | 325 // will not be modified. On the client-side, this is the far-end (or to be |
| 307 // rendered) audio. | 326 // rendered) audio. |
| 308 // | 327 // |
| 309 // It is only necessary to provide this if echo processing is enabled, as the | 328 // It is only necessary to provide this if echo processing is enabled, as the |
| 310 // reverse stream forms the echo reference signal. It is recommended, but not | 329 // reverse stream forms the echo reference signal. It is recommended, but not |
| 311 // necessary, to provide if gain control is enabled. On the server-side this | 330 // necessary, to provide if gain control is enabled. On the server-side this |
| 312 // typically will not be used. If you're not sure what to pass in here, | 331 // typically will not be used. If you're not sure what to pass in here, |
| 313 // chances are you don't need to use it. | 332 // chances are you don't need to use it. |
| 314 // | 333 // |
| 315 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 334 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
| 316 // members of |frame| must be valid. |sample_rate_hz_| must correspond to | 335 // members of |frame| must be valid. |sample_rate_hz_| must correspond to |
| 317 // |input_sample_rate_hz()| | 336 // |input_sample_rate_hz()| |
| 318 // | 337 // |
| 319 // TODO(ajm): add const to input; requires an implementation fix. | 338 // TODO(ajm): add const to input; requires an implementation fix. |
| 320 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; | 339 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; |
| 321 | 340 |
| 322 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 341 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
| 323 // of |data| points to a channel buffer, arranged according to |layout|. | 342 // of |data| points to a channel buffer, arranged according to |layout|. |
| 324 virtual int AnalyzeReverseStream(const float* const* data, | 343 virtual int AnalyzeReverseStream(const float* const* data, |
| 325 int samples_per_channel, | 344 int samples_per_channel, |
| 326 int sample_rate_hz, | 345 int sample_rate_hz, |
| 327 ChannelLayout layout) = 0; | 346 ChannelLayout layout) = 0; |
| 328 | 347 |
| 348 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of | |
| 349 // |data| points to a channel buffer, arranged according to |reverse_config|. | |
| 350 virtual int AnalyzeReverseStream(const float* const* data, | |
| 351 const StreamConfig& reverse_config) = 0; | |
| 352 | |
| 329 // This must be called if and only if echo processing is enabled. | 353 // This must be called if and only if echo processing is enabled. |
| 330 // | 354 // |
| 331 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end | 355 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end |
| 332 // frame and ProcessStream() receiving a near-end frame containing the | 356 // frame and ProcessStream() receiving a near-end frame containing the |
| 333 // corresponding echo. On the client-side this can be expressed as | 357 // corresponding echo. On the client-side this can be expressed as |
| 334 // delay = (t_render - t_analyze) + (t_process - t_capture) | 358 // delay = (t_render - t_analyze) + (t_process - t_capture) |
| 335 // where, | 359 // where, |
| 336 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and | 360 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and |
| 337 // t_render is the time the first sample of the same frame is rendered by | 361 // t_render is the time the first sample of the same frame is rendered by |
| 338 // the audio hardware. | 362 // the audio hardware. |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 425 enum NativeRate { | 449 enum NativeRate { |
| 426 kSampleRate8kHz = 8000, | 450 kSampleRate8kHz = 8000, |
| 427 kSampleRate16kHz = 16000, | 451 kSampleRate16kHz = 16000, |
| 428 kSampleRate32kHz = 32000, | 452 kSampleRate32kHz = 32000, |
| 429 kSampleRate48kHz = 48000 | 453 kSampleRate48kHz = 48000 |
| 430 }; | 454 }; |
| 431 | 455 |
| 432 static const int kChunkSizeMs = 10; | 456 static const int kChunkSizeMs = 10; |
| 433 }; | 457 }; |
| 434 | 458 |
| 459 class StreamConfig { | |
| 460 public: | |
| 461 StreamConfig(int sample_rate_hz = 0, | |
| 462 int num_channels = 0, | |
| 463 bool has_keyboard = false) | |
| 464 : sample_rate_hz_(sample_rate_hz), | |
| 465 num_channels_(num_channels), | |
| 466 has_keyboard_(has_keyboard), | |
| 467 samples_per_channel_(calculate_samples_per_channel(sample_rate_hz)) {} | |
|
ajm
2015/07/15 05:21:20
Alex, Michael: I know it breaks convention in this
aluebs-webrtc
2015/07/15 16:42:17
It is unfortunate that we used "frames" to refer t
mgraczyk
2015/07/15 20:03:20
Done, although I think there are plenty of better
| |
| 468 | |
| 469 void set_sample_rate_hz(int value) { | |
| 470 sample_rate_hz_ = value; | |
| 471 samples_per_channel_ = calculate_samples_per_channel(value); | |
| 472 } | |
| 473 void set_num_channels(int value) { num_channels_ = value; } | |
| 474 void set_has_keyboard(bool value) { has_keyboard_ = value; } | |
| 475 | |
| 476 int sample_rate_hz() const { return sample_rate_hz_; } | |
| 477 int num_channels() const { return num_channels_; } | |
| 478 bool has_keyboard() const { return has_keyboard_; } | |
| 479 int samples_per_channel() const { return samples_per_channel_; } | |
| 480 | |
| 481 bool operator==(const StreamConfig& other) const { | |
| 482 return sample_rate_hz_ == other.sample_rate_hz_ && | |
| 483 num_channels_ == other.num_channels_ && | |
| 484 has_keyboard_ == other.has_keyboard_; | |
| 485 } | |
| 486 | |
| 487 bool operator!=(const StreamConfig& other) const { return !(*this == other); } | |
| 488 | |
| 489 private: | |
| 490 static int calculate_samples_per_channel(int sample_rate_hz) { | |
| 491 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; | |
| 492 } | |
| 493 | |
| 494 int sample_rate_hz_; | |
| 495 int num_channels_; | |
| 496 bool has_keyboard_; | |
| 497 int samples_per_channel_; | |
| 498 }; | |
| 499 | |
| 500 class ProcessingConfig { | |
| 501 public: | |
| 502 enum StreamName { | |
| 503 kInputStream, | |
| 504 kOutputStream, | |
| 505 kReverseStream, | |
| 506 kNumStreamNames, | |
| 507 }; | |
| 508 | |
| 509 const StreamConfig& input_stream() const { | |
| 510 return streams[StreamName::kInputStream]; | |
| 511 } | |
| 512 const StreamConfig& output_stream() const { | |
| 513 return streams[StreamName::kOutputStream]; | |
| 514 } | |
| 515 const StreamConfig& reverse_stream() const { | |
| 516 return streams[StreamName::kReverseStream]; | |
| 517 } | |
| 518 | |
| 519 StreamConfig& input_stream() { | |
| 520 return streams[StreamName::kInputStream]; | |
| 521 } | |
| 522 StreamConfig& output_stream() { | |
| 523 return streams[StreamName::kOutputStream]; | |
| 524 } | |
| 525 StreamConfig& reverse_stream() { | |
| 526 return streams[StreamName::kReverseStream]; | |
| 527 } | |
| 528 | |
| 529 bool operator==(const ProcessingConfig& other) const { | |
| 530 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { | |
| 531 if (this->streams[i] != other.streams[i]) { | |
| 532 return false; | |
| 533 } | |
| 534 } | |
| 535 return true; | |
| 536 } | |
| 537 | |
| 538 bool operator!=(const ProcessingConfig& other) const { | |
| 539 return !(*this == other); | |
| 540 } | |
| 541 | |
| 542 StreamConfig streams[StreamName::kNumStreamNames]; | |
| 543 }; | |
| 544 | |
| 435 // The acoustic echo cancellation (AEC) component provides better performance | 545 // The acoustic echo cancellation (AEC) component provides better performance |
| 436 // than AECM but also requires more processing power and is dependent on delay | 546 // than AECM but also requires more processing power and is dependent on delay |
| 437 // stability and reporting accuracy. As such it is well-suited and recommended | 547 // stability and reporting accuracy. As such it is well-suited and recommended |
| 438 // for PC and IP phone applications. | 548 // for PC and IP phone applications. |
| 439 // | 549 // |
| 440 // Not recommended to be enabled on the server-side. | 550 // Not recommended to be enabled on the server-side. |
| 441 class EchoCancellation { | 551 class EchoCancellation { |
| 442 public: | 552 public: |
| 443 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. | 553 // EchoCancellation and EchoControlMobile may not be enabled simultaneously. |
| 444 // Enabling one will disable the other. | 554 // Enabling one will disable the other. |
| (...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 787 // This does not impact the size of frames passed to |ProcessStream()|. | 897 // This does not impact the size of frames passed to |ProcessStream()|. |
| 788 virtual int set_frame_size_ms(int size) = 0; | 898 virtual int set_frame_size_ms(int size) = 0; |
| 789 virtual int frame_size_ms() const = 0; | 899 virtual int frame_size_ms() const = 0; |
| 790 | 900 |
| 791 protected: | 901 protected: |
| 792 virtual ~VoiceDetection() {} | 902 virtual ~VoiceDetection() {} |
| 793 }; | 903 }; |
| 794 } // namespace webrtc | 904 } // namespace webrtc |
| 795 | 905 |
| 796 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ | 906 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ |
| OLD | NEW |