Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 109 Beamforming() | 109 Beamforming() |
| 110 : enabled(false), | 110 : enabled(false), |
| 111 array_geometry() {} | 111 array_geometry() {} |
| 112 Beamforming(bool enabled, const std::vector<Point>& array_geometry) | 112 Beamforming(bool enabled, const std::vector<Point>& array_geometry) |
| 113 : enabled(enabled), | 113 : enabled(enabled), |
| 114 array_geometry(array_geometry) {} | 114 array_geometry(array_geometry) {} |
| 115 const bool enabled; | 115 const bool enabled; |
| 116 const std::vector<Point> array_geometry; | 116 const std::vector<Point> array_geometry; |
| 117 }; | 117 }; |
| 118 | 118 |
| 119 // Use to enable intelligibility enhancer in audio processing. Must be provided | |
| 120 // though the constructor. It will have no impact if used with | |
| 121 // AudioProcessing::SetExtraOptions(). | |
| 122 // | |
| 123 // Note: If enabled and the reverse stream has more than one output channel, | |
| 124 // the reverse stream will become an upmixed mono signal. | |
| 125 struct Intelligibility { | |
| 126 Intelligibility() : enabled(false) {} | |
| 127 explicit Intelligibility(bool enabled) : enabled(enabled) {} | |
| 128 bool enabled; | |
| 129 }; | |
| 130 | |
| 119 static const int kAudioProcMaxNativeSampleRateHz = 32000; | 131 static const int kAudioProcMaxNativeSampleRateHz = 32000; |
| 120 | 132 |
| 121 // The Audio Processing Module (APM) provides a collection of voice processing | 133 // The Audio Processing Module (APM) provides a collection of voice processing |
| 122 // components designed for real-time communications software. | 134 // components designed for real-time communications software. |
| 123 // | 135 // |
| 124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the | 136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the |
| 125 // primary stream, on which all processing is applied, are passed to | 137 // primary stream, on which all processing is applied, are passed to |
| 126 // |ProcessStream()|. Frames of the reverse direction stream, which are used for | 138 // |ProcessStream()|. Frames of the reverse direction stream, which are used for |
| 127 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the | 139 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the |
| 128 // client-side, this will typically be the near-end (capture) and far-end | 140 // client-side, this will typically be the near-end (capture) and far-end |
| (...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 326 // reverse stream forms the echo reference signal. It is recommended, but not | 338 // reverse stream forms the echo reference signal. It is recommended, but not |
| 327 // necessary, to provide if gain control is enabled. On the server-side this | 339 // necessary, to provide if gain control is enabled. On the server-side this |
| 328 // typically will not be used. If you're not sure what to pass in here, | 340 // typically will not be used. If you're not sure what to pass in here, |
| 329 // chances are you don't need to use it. | 341 // chances are you don't need to use it. |
| 330 // | 342 // |
| 331 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| | 343 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| |
| 332 // members of |frame| must be valid. |sample_rate_hz_| must correspond to | 344 // members of |frame| must be valid. |sample_rate_hz_| must correspond to |
| 333 // |input_sample_rate_hz()| | 345 // |input_sample_rate_hz()| |
| 334 // | 346 // |
| 335 // TODO(ajm): add const to input; requires an implementation fix. | 347 // TODO(ajm): add const to input; requires an implementation fix. |
| 348 // DEPRECATED: Use |ProcessReverseStream| instead. | |
| 349 // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|. | |
| 336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; | 350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; |
| 337 | 351 |
| 352 // Same as |AnalyzeReverseStream|, but may modify |data| if intelligibility | |
|
Andrew MacDonald
2015/07/30 18:48:53
modify |frame|
ekm
2015/07/30 22:38:44
Done.
| |
| 353 // is enabled. | |
| 354 virtual int ProcessReverseStream(AudioFrame* frame) = 0; | |
| 355 | |
| 338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element | 356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element |
| 339 // of |data| points to a channel buffer, arranged according to |layout|. | 357 // of |data| points to a channel buffer, arranged according to |layout|. |
| 340 // | |
| 341 // TODO(mgraczyk): Remove once clients are updated to use the new interface. | 358 // TODO(mgraczyk): Remove once clients are updated to use the new interface. |
| 342 virtual int AnalyzeReverseStream(const float* const* data, | 359 virtual int AnalyzeReverseStream(const float* const* data, |
| 343 int samples_per_channel, | 360 int samples_per_channel, |
| 344 int sample_rate_hz, | 361 int rev_sample_rate_hz, |
| 345 ChannelLayout layout) = 0; | 362 ChannelLayout layout) = 0; |
| 346 | 363 |
| 347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of | 364 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of |
| 348 // |data| points to a channel buffer, arranged according to |reverse_config|. | 365 // |data| points to a channel buffer, arranged according to |reverse_config|. |
| 349 virtual int AnalyzeReverseStream(const float* const* data, | 366 virtual int ProcessReverseStream(const float* const* src, |
| 350 const StreamConfig& reverse_config) = 0; | 367 const StreamConfig& reverse_input_config, |
| 368 const StreamConfig& reverse_output_config, | |
|
Andrew MacDonald
2015/07/30 18:48:53
I'm not sure we want to do this. The way you have
aluebs-webrtc
2015/07/30 18:56:34
You bring a good point here. I think it makes sens
ekm
2015/07/30 21:23:50
Are we talking about converting reverse to capture
aluebs-webrtc
2015/07/30 23:09:51
I would prefer to avoid adding yet another interfa
ekm
2015/07/30 23:20:17
Ok. Why are the two configs in the ProcessStream i
aluebs-webrtc
2015/07/30 23:23:38
So that the APM knows what the user expects as out
| |
| 369 float* const* dest) = 0; | |
| 351 | 370 |
| 352 // This must be called if and only if echo processing is enabled. | 371 // This must be called if and only if echo processing is enabled. |
| 353 // | 372 // |
| 354 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end | 373 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end |
| 355 // frame and ProcessStream() receiving a near-end frame containing the | 374 // frame and ProcessStream() receiving a near-end frame containing the |
| 356 // corresponding echo. On the client-side this can be expressed as | 375 // corresponding echo. On the client-side this can be expressed as |
| 357 // delay = (t_render - t_analyze) + (t_process - t_capture) | 376 // delay = (t_render - t_analyze) + (t_process - t_capture) |
| 358 // where, | 377 // where, |
| 359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and | 378 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and |
| 360 // t_render is the time the first sample of the same frame is rendered by | 379 // t_render is the time the first sample of the same frame is rendered by |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 462 // num_channels: The number of audio channels in the stream, excluding the | 481 // num_channels: The number of audio channels in the stream, excluding the |
| 463 // keyboard channel if it is present. When passing a | 482 // keyboard channel if it is present. When passing a |
| 464 // StreamConfig with an array of arrays T*[N], | 483 // StreamConfig with an array of arrays T*[N], |
| 465 // | 484 // |
| 466 // N == {num_channels + 1 if has_keyboard | 485 // N == {num_channels + 1 if has_keyboard |
| 467 // {num_channels if !has_keyboard | 486 // {num_channels if !has_keyboard |
| 468 // | 487 // |
| 469 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard | 488 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard |
| 470 // is true, the last channel in any corresponding list of | 489 // is true, the last channel in any corresponding list of |
| 471 // channels is the keyboard channel. | 490 // channels is the keyboard channel. |
| 472 StreamConfig(int sample_rate_hz = 0, | 491 StreamConfig(int sample_rate_hz = 16000, |
| 473 int num_channels = 0, | 492 int num_channels = 1, |
|
aluebs-webrtc
2015/07/30 15:28:07
I think this zero-initialization here is on purpos
Andrew MacDonald
2015/07/30 18:48:53
Agreed.
ekm
2015/07/30 22:38:44
That makes sense.
I thought it'd be a nice way to
| |
| 474 bool has_keyboard = false) | 493 bool has_keyboard = false) |
| 475 : sample_rate_hz_(sample_rate_hz), | 494 : sample_rate_hz_(sample_rate_hz), |
| 476 num_channels_(num_channels), | 495 num_channels_(num_channels), |
| 477 has_keyboard_(has_keyboard), | 496 has_keyboard_(has_keyboard), |
| 478 num_frames_(calculate_frames(sample_rate_hz)) {} | 497 num_frames_(calculate_frames(sample_rate_hz)) {} |
| 479 | 498 |
| 480 void set_sample_rate_hz(int value) { | 499 void set_sample_rate_hz(int value) { |
| 481 sample_rate_hz_ = value; | 500 sample_rate_hz_ = value; |
| 482 num_frames_ = calculate_frames(value); | 501 num_frames_ = calculate_frames(value); |
| 483 } | 502 } |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 510 int num_channels_; | 529 int num_channels_; |
| 511 bool has_keyboard_; | 530 bool has_keyboard_; |
| 512 int num_frames_; | 531 int num_frames_; |
| 513 }; | 532 }; |
| 514 | 533 |
| 515 class ProcessingConfig { | 534 class ProcessingConfig { |
| 516 public: | 535 public: |
| 517 enum StreamName { | 536 enum StreamName { |
| 518 kInputStream, | 537 kInputStream, |
| 519 kOutputStream, | 538 kOutputStream, |
| 520 kReverseStream, | 539 kReverseInputStream, |
| 540 kReverseOutputStream, | |
| 521 kNumStreamNames, | 541 kNumStreamNames, |
| 522 }; | 542 }; |
| 523 | 543 |
| 524 const StreamConfig& input_stream() const { | 544 const StreamConfig& input_stream() const { |
| 525 return streams[StreamName::kInputStream]; | 545 return streams[StreamName::kInputStream]; |
| 526 } | 546 } |
| 527 const StreamConfig& output_stream() const { | 547 const StreamConfig& output_stream() const { |
| 528 return streams[StreamName::kOutputStream]; | 548 return streams[StreamName::kOutputStream]; |
| 529 } | 549 } |
| 530 const StreamConfig& reverse_stream() const { | 550 const StreamConfig& reverse_input_stream() const { |
| 531 return streams[StreamName::kReverseStream]; | 551 return streams[StreamName::kReverseInputStream]; |
| 552 } | |
| 553 const StreamConfig& reverse_output_stream() const { | |
| 554 return streams[StreamName::kReverseOutputStream]; | |
| 532 } | 555 } |
| 533 | 556 |
| 534 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } | 557 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } |
| 535 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } | 558 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } |
| 536 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } | 559 StreamConfig& reverse_input_stream() { |
| 560 return streams[StreamName::kReverseInputStream]; | |
| 561 } | |
| 562 StreamConfig& reverse_output_stream() { | |
| 563 return streams[StreamName::kReverseOutputStream]; | |
| 564 } | |
| 537 | 565 |
| 538 bool operator==(const ProcessingConfig& other) const { | 566 bool operator==(const ProcessingConfig& other) const { |
| 539 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { | 567 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { |
| 540 if (this->streams[i] != other.streams[i]) { | 568 if (this->streams[i] != other.streams[i]) { |
| 541 return false; | 569 return false; |
| 542 } | 570 } |
| 543 } | 571 } |
| 544 return true; | 572 return true; |
| 545 } | 573 } |
| 546 | 574 |
| (...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 906 // This does not impact the size of frames passed to |ProcessStream()|. | 934 // This does not impact the size of frames passed to |ProcessStream()|. |
| 907 virtual int set_frame_size_ms(int size) = 0; | 935 virtual int set_frame_size_ms(int size) = 0; |
| 908 virtual int frame_size_ms() const = 0; | 936 virtual int frame_size_ms() const = 0; |
| 909 | 937 |
| 910 protected: | 938 protected: |
| 911 virtual ~VoiceDetection() {} | 939 virtual ~VoiceDetection() {} |
| 912 }; | 940 }; |
| 913 } // namespace webrtc | 941 } // namespace webrtc |
| 914 | 942 |
| 915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ | 943 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ |
| OLD | NEW |