Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Added support for distinct reverse input/output streams. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 Beamforming() 109 Beamforming()
110 : enabled(false), 110 : enabled(false),
111 array_geometry() {} 111 array_geometry() {}
112 Beamforming(bool enabled, const std::vector<Point>& array_geometry) 112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)
113 : enabled(enabled), 113 : enabled(enabled),
114 array_geometry(array_geometry) {} 114 array_geometry(array_geometry) {}
115 const bool enabled; 115 const bool enabled;
116 const std::vector<Point> array_geometry; 116 const std::vector<Point> array_geometry;
117 }; 117 };
118 118
119 // Use to enable intelligibility enhancer in audio processing. Must be provided
120 // though the constructor. It will have no impact if used with
121 // AudioProcessing::SetExtraOptions().
122 //
123 // Note: If enabled and the reverse stream has more than one output channel,
124 // the reverse stream will become an upmixed mono signal.
125 struct Intelligibility {
126 Intelligibility() : enabled(false) {}
127 explicit Intelligibility(bool enabled) : enabled(enabled) {}
128 bool enabled;
129 };
130
119 static const int kAudioProcMaxNativeSampleRateHz = 32000; 131 static const int kAudioProcMaxNativeSampleRateHz = 32000;
120 132
121 // The Audio Processing Module (APM) provides a collection of voice processing 133 // The Audio Processing Module (APM) provides a collection of voice processing
122 // components designed for real-time communications software. 134 // components designed for real-time communications software.
123 // 135 //
124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the 136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the
125 // primary stream, on which all processing is applied, are passed to 137 // primary stream, on which all processing is applied, are passed to
126 // |ProcessStream()|. Frames of the reverse direction stream, which are used for 138 // |ProcessStream()|. Frames of the reverse direction stream, which are used for
127 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the 139 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the
128 // client-side, this will typically be the near-end (capture) and far-end 140 // client-side, this will typically be the near-end (capture) and far-end
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 // reverse stream forms the echo reference signal. It is recommended, but not 338 // reverse stream forms the echo reference signal. It is recommended, but not
327 // necessary, to provide if gain control is enabled. On the server-side this 339 // necessary, to provide if gain control is enabled. On the server-side this
328 // typically will not be used. If you're not sure what to pass in here, 340 // typically will not be used. If you're not sure what to pass in here,
329 // chances are you don't need to use it. 341 // chances are you don't need to use it.
330 // 342 //
331 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 343 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
332 // members of |frame| must be valid. |sample_rate_hz_| must correspond to 344 // members of |frame| must be valid. |sample_rate_hz_| must correspond to
333 // |input_sample_rate_hz()| 345 // |input_sample_rate_hz()|
334 // 346 //
335 // TODO(ajm): add const to input; requires an implementation fix. 347 // TODO(ajm): add const to input; requires an implementation fix.
348 // DEPRECATED: Use |ProcessReverseStream| instead.
349 // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|.
336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; 350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
337 351
352 // Same as |AnalyzeReverseStream|, but may modify |data| if intelligibility
Andrew MacDonald 2015/07/30 18:48:53 modify |frame|
ekm 2015/07/30 22:38:44 Done.
353 // is enabled.
354 virtual int ProcessReverseStream(AudioFrame* frame) = 0;
355
338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
339 // of |data| points to a channel buffer, arranged according to |layout|. 357 // of |data| points to a channel buffer, arranged according to |layout|.
340 //
341 // TODO(mgraczyk): Remove once clients are updated to use the new interface. 358 // TODO(mgraczyk): Remove once clients are updated to use the new interface.
342 virtual int AnalyzeReverseStream(const float* const* data, 359 virtual int AnalyzeReverseStream(const float* const* data,
343 int samples_per_channel, 360 int samples_per_channel,
344 int sample_rate_hz, 361 int rev_sample_rate_hz,
345 ChannelLayout layout) = 0; 362 ChannelLayout layout) = 0;
346 363
347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of 364 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
348 // |data| points to a channel buffer, arranged according to |reverse_config|. 365 // |data| points to a channel buffer, arranged according to |reverse_config|.
349 virtual int AnalyzeReverseStream(const float* const* data, 366 virtual int ProcessReverseStream(const float* const* src,
350 const StreamConfig& reverse_config) = 0; 367 const StreamConfig& reverse_input_config,
368 const StreamConfig& reverse_output_config,
Andrew MacDonald 2015/07/30 18:48:53 I'm not sure we want to do this. The way you have
aluebs-webrtc 2015/07/30 18:56:34 You bring a good point here. I think it makes sens
ekm 2015/07/30 21:23:50 Are we talking about converting reverse to capture
aluebs-webrtc 2015/07/30 23:09:51 I would prefer to avoid adding yet another interfa
ekm 2015/07/30 23:20:17 Ok. Why are the two configs in the ProcessStream i
aluebs-webrtc 2015/07/30 23:23:38 So that the APM knows what the user expects as out
369 float* const* dest) = 0;
351 370
352 // This must be called if and only if echo processing is enabled. 371 // This must be called if and only if echo processing is enabled.
353 // 372 //
354 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end 373 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
355 // frame and ProcessStream() receiving a near-end frame containing the 374 // frame and ProcessStream() receiving a near-end frame containing the
356 // corresponding echo. On the client-side this can be expressed as 375 // corresponding echo. On the client-side this can be expressed as
357 // delay = (t_render - t_analyze) + (t_process - t_capture) 376 // delay = (t_render - t_analyze) + (t_process - t_capture)
358 // where, 377 // where,
359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and 378 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
360 // t_render is the time the first sample of the same frame is rendered by 379 // t_render is the time the first sample of the same frame is rendered by
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
462 // num_channels: The number of audio channels in the stream, excluding the 481 // num_channels: The number of audio channels in the stream, excluding the
463 // keyboard channel if it is present. When passing a 482 // keyboard channel if it is present. When passing a
464 // StreamConfig with an array of arrays T*[N], 483 // StreamConfig with an array of arrays T*[N],
465 // 484 //
466 // N == {num_channels + 1 if has_keyboard 485 // N == {num_channels + 1 if has_keyboard
467 // {num_channels if !has_keyboard 486 // {num_channels if !has_keyboard
468 // 487 //
469 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard 488 // has_keyboard: True if the stream has a keyboard channel. When has_keyboard
470 // is true, the last channel in any corresponding list of 489 // is true, the last channel in any corresponding list of
471 // channels is the keyboard channel. 490 // channels is the keyboard channel.
472 StreamConfig(int sample_rate_hz = 0, 491 StreamConfig(int sample_rate_hz = 16000,
473 int num_channels = 0, 492 int num_channels = 1,
aluebs-webrtc 2015/07/30 15:28:07 I think this zero-initialization here is on purpos
Andrew MacDonald 2015/07/30 18:48:53 Agreed.
ekm 2015/07/30 22:38:44 That makes sense. I thought it'd be a nice way to
474 bool has_keyboard = false) 493 bool has_keyboard = false)
475 : sample_rate_hz_(sample_rate_hz), 494 : sample_rate_hz_(sample_rate_hz),
476 num_channels_(num_channels), 495 num_channels_(num_channels),
477 has_keyboard_(has_keyboard), 496 has_keyboard_(has_keyboard),
478 num_frames_(calculate_frames(sample_rate_hz)) {} 497 num_frames_(calculate_frames(sample_rate_hz)) {}
479 498
480 void set_sample_rate_hz(int value) { 499 void set_sample_rate_hz(int value) {
481 sample_rate_hz_ = value; 500 sample_rate_hz_ = value;
482 num_frames_ = calculate_frames(value); 501 num_frames_ = calculate_frames(value);
483 } 502 }
(...skipping 26 matching lines...) Expand all
510 int num_channels_; 529 int num_channels_;
511 bool has_keyboard_; 530 bool has_keyboard_;
512 int num_frames_; 531 int num_frames_;
513 }; 532 };
514 533
515 class ProcessingConfig { 534 class ProcessingConfig {
516 public: 535 public:
517 enum StreamName { 536 enum StreamName {
518 kInputStream, 537 kInputStream,
519 kOutputStream, 538 kOutputStream,
520 kReverseStream, 539 kReverseInputStream,
540 kReverseOutputStream,
521 kNumStreamNames, 541 kNumStreamNames,
522 }; 542 };
523 543
524 const StreamConfig& input_stream() const { 544 const StreamConfig& input_stream() const {
525 return streams[StreamName::kInputStream]; 545 return streams[StreamName::kInputStream];
526 } 546 }
527 const StreamConfig& output_stream() const { 547 const StreamConfig& output_stream() const {
528 return streams[StreamName::kOutputStream]; 548 return streams[StreamName::kOutputStream];
529 } 549 }
530 const StreamConfig& reverse_stream() const { 550 const StreamConfig& reverse_input_stream() const {
531 return streams[StreamName::kReverseStream]; 551 return streams[StreamName::kReverseInputStream];
552 }
553 const StreamConfig& reverse_output_stream() const {
554 return streams[StreamName::kReverseOutputStream];
532 } 555 }
533 556
534 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } 557 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
535 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } 558 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
536 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } 559 StreamConfig& reverse_input_stream() {
560 return streams[StreamName::kReverseInputStream];
561 }
562 StreamConfig& reverse_output_stream() {
563 return streams[StreamName::kReverseOutputStream];
564 }
537 565
538 bool operator==(const ProcessingConfig& other) const { 566 bool operator==(const ProcessingConfig& other) const {
539 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { 567 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
540 if (this->streams[i] != other.streams[i]) { 568 if (this->streams[i] != other.streams[i]) {
541 return false; 569 return false;
542 } 570 }
543 } 571 }
544 return true; 572 return true;
545 } 573 }
546 574
(...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after
906 // This does not impact the size of frames passed to |ProcessStream()|. 934 // This does not impact the size of frames passed to |ProcessStream()|.
907 virtual int set_frame_size_ms(int size) = 0; 935 virtual int set_frame_size_ms(int size) = 0;
908 virtual int frame_size_ms() const = 0; 936 virtual int frame_size_ms() const = 0;
909 937
910 protected: 938 protected:
911 virtual ~VoiceDetection() {} 939 virtual ~VoiceDetection() {}
912 }; 940 };
913 } // namespace webrtc 941 } // namespace webrtc
914 942
915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 943 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698