Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(169)

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fix Mac Error (3) Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 Beamforming() 109 Beamforming()
110 : enabled(false), 110 : enabled(false),
111 array_geometry() {} 111 array_geometry() {}
112 Beamforming(bool enabled, const std::vector<Point>& array_geometry) 112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)
113 : enabled(enabled), 113 : enabled(enabled),
114 array_geometry(array_geometry) {} 114 array_geometry(array_geometry) {}
115 const bool enabled; 115 const bool enabled;
116 const std::vector<Point> array_geometry; 116 const std::vector<Point> array_geometry;
117 }; 117 };
118 118
119 // Use to enable intelligibility enhancer in audio processing. Must be provided
120 // though the constructor. It will have no impact if used with
121 // AudioProcessing::SetExtraOptions().
122 //
123 // Note: If enabled and the reverse stream has more than one output channel,
124 // the reverse stream will become an upmixed mono signal.
125 struct Intelligibility {
126 Intelligibility() : enabled(false) {}
127 explicit Intelligibility(bool enabled) : enabled(enabled) {}
128 bool enabled;
129 };
130
119 static const int kAudioProcMaxNativeSampleRateHz = 32000; 131 static const int kAudioProcMaxNativeSampleRateHz = 32000;
120 132
121 // The Audio Processing Module (APM) provides a collection of voice processing 133 // The Audio Processing Module (APM) provides a collection of voice processing
122 // components designed for real-time communications software. 134 // components designed for real-time communications software.
123 // 135 //
124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the 136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the
125 // primary stream, on which all processing is applied, are passed to 137 // primary stream, on which all processing is applied, are passed to
126 // |ProcessStream()|. Frames of the reverse direction stream, which are used for 138 // |ProcessStream()|. Frames of the reverse direction stream, which are used for
127 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the 139 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the
128 // client-side, this will typically be the near-end (capture) and far-end 140 // client-side, this will typically be the near-end (capture) and far-end
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 // reverse stream forms the echo reference signal. It is recommended, but not 338 // reverse stream forms the echo reference signal. It is recommended, but not
327 // necessary, to provide if gain control is enabled. On the server-side this 339 // necessary, to provide if gain control is enabled. On the server-side this
328 // typically will not be used. If you're not sure what to pass in here, 340 // typically will not be used. If you're not sure what to pass in here,
329 // chances are you don't need to use it. 341 // chances are you don't need to use it.
330 // 342 //
331 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 343 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
332 // members of |frame| must be valid. |sample_rate_hz_| must correspond to 344 // members of |frame| must be valid. |sample_rate_hz_| must correspond to
333 // |input_sample_rate_hz()| 345 // |input_sample_rate_hz()|
334 // 346 //
335 // TODO(ajm): add const to input; requires an implementation fix. 347 // TODO(ajm): add const to input; requires an implementation fix.
348 // DEPRECATED: Use |ProcessReverseStream| instead.
349 // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|.
336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; 350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
337 351
352 // Same as |AnalyzeReverseStream|, but may modify |frame| if intelligibility
353 // is enabled.
354 virtual int ProcessReverseStream(AudioFrame* frame) = 0;
355
338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
339 // of |data| points to a channel buffer, arranged according to |layout|. 357 // of |data| points to a channel buffer, arranged according to |layout|.
340 //
341 // TODO(mgraczyk): Remove once clients are updated to use the new interface. 358 // TODO(mgraczyk): Remove once clients are updated to use the new interface.
342 virtual int AnalyzeReverseStream(const float* const* data, 359 virtual int AnalyzeReverseStream(const float* const* data,
343 int samples_per_channel, 360 int samples_per_channel,
344 int sample_rate_hz, 361 int rev_sample_rate_hz,
345 ChannelLayout layout) = 0; 362 ChannelLayout layout) = 0;
346 363
347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of 364 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
348 // |data| points to a channel buffer, arranged according to |reverse_config|. 365 // |data| points to a channel buffer, arranged according to |reverse_config|.
349 virtual int AnalyzeReverseStream(const float* const* data, 366 virtual int ProcessReverseStream(const float* const* src,
350 const StreamConfig& reverse_config) = 0; 367 const StreamConfig& reverse_input_config,
368 const StreamConfig& reverse_output_config,
369 float* const* dest) = 0;
351 370
352 // This must be called if and only if echo processing is enabled. 371 // This must be called if and only if echo processing is enabled.
353 // 372 //
354 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end 373 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
355 // frame and ProcessStream() receiving a near-end frame containing the 374 // frame and ProcessStream() receiving a near-end frame containing the
356 // corresponding echo. On the client-side this can be expressed as 375 // corresponding echo. On the client-side this can be expressed as
357 // delay = (t_render - t_analyze) + (t_process - t_capture) 376 // delay = (t_render - t_analyze) + (t_process - t_capture)
358 // where, 377 // where,
359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and 378 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
360 // t_render is the time the first sample of the same frame is rendered by 379 // t_render is the time the first sample of the same frame is rendered by
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
485 void set_has_keyboard(bool value) { has_keyboard_ = value; } 504 void set_has_keyboard(bool value) { has_keyboard_ = value; }
486 505
487 int sample_rate_hz() const { return sample_rate_hz_; } 506 int sample_rate_hz() const { return sample_rate_hz_; }
488 507
489 // The number of channels in the stream, not including the keyboard channel if 508 // The number of channels in the stream, not including the keyboard channel if
490 // present. 509 // present.
491 int num_channels() const { return num_channels_; } 510 int num_channels() const { return num_channels_; }
492 511
493 bool has_keyboard() const { return has_keyboard_; } 512 bool has_keyboard() const { return has_keyboard_; }
494 int num_frames() const { return num_frames_; } 513 int num_frames() const { return num_frames_; }
514 int num_samples() const { return num_channels_ * num_frames_; }
495 515
496 bool operator==(const StreamConfig& other) const { 516 bool operator==(const StreamConfig& other) const {
497 return sample_rate_hz_ == other.sample_rate_hz_ && 517 return sample_rate_hz_ == other.sample_rate_hz_ &&
498 num_channels_ == other.num_channels_ && 518 num_channels_ == other.num_channels_ &&
499 has_keyboard_ == other.has_keyboard_; 519 has_keyboard_ == other.has_keyboard_;
500 } 520 }
501 521
502 bool operator!=(const StreamConfig& other) const { return !(*this == other); } 522 bool operator!=(const StreamConfig& other) const { return !(*this == other); }
503 523
504 private: 524 private:
505 static int calculate_frames(int sample_rate_hz) { 525 static int calculate_frames(int sample_rate_hz) {
506 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000; 526 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
507 } 527 }
508 528
509 int sample_rate_hz_; 529 int sample_rate_hz_;
510 int num_channels_; 530 int num_channels_;
511 bool has_keyboard_; 531 bool has_keyboard_;
512 int num_frames_; 532 int num_frames_;
513 }; 533 };
514 534
515 class ProcessingConfig { 535 class ProcessingConfig {
516 public: 536 public:
517 enum StreamName { 537 enum StreamName {
518 kInputStream, 538 kInputStream,
519 kOutputStream, 539 kOutputStream,
520 kReverseStream, 540 kReverseInputStream,
541 kReverseOutputStream,
521 kNumStreamNames, 542 kNumStreamNames,
522 }; 543 };
523 544
524 const StreamConfig& input_stream() const { 545 const StreamConfig& input_stream() const {
525 return streams[StreamName::kInputStream]; 546 return streams[StreamName::kInputStream];
526 } 547 }
527 const StreamConfig& output_stream() const { 548 const StreamConfig& output_stream() const {
528 return streams[StreamName::kOutputStream]; 549 return streams[StreamName::kOutputStream];
529 } 550 }
530 const StreamConfig& reverse_stream() const { 551 const StreamConfig& reverse_input_stream() const {
531 return streams[StreamName::kReverseStream]; 552 return streams[StreamName::kReverseInputStream];
553 }
554 const StreamConfig& reverse_output_stream() const {
555 return streams[StreamName::kReverseOutputStream];
532 } 556 }
533 557
534 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } 558 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
535 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } 559 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
536 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } 560 StreamConfig& reverse_input_stream() {
561 return streams[StreamName::kReverseInputStream];
562 }
563 StreamConfig& reverse_output_stream() {
564 return streams[StreamName::kReverseOutputStream];
565 }
537 566
538 bool operator==(const ProcessingConfig& other) const { 567 bool operator==(const ProcessingConfig& other) const {
539 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { 568 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
540 if (this->streams[i] != other.streams[i]) { 569 if (this->streams[i] != other.streams[i]) {
541 return false; 570 return false;
542 } 571 }
543 } 572 }
544 return true; 573 return true;
545 } 574 }
546 575
(...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after
906 // This does not impact the size of frames passed to |ProcessStream()|. 935 // This does not impact the size of frames passed to |ProcessStream()|.
907 virtual int set_frame_size_ms(int size) = 0; 936 virtual int set_frame_size_ms(int size) = 0;
908 virtual int frame_size_ms() const = 0; 937 virtual int frame_size_ms() const = 0;
909 938
910 protected: 939 protected:
911 virtual ~VoiceDetection() {} 940 virtual ~VoiceDetection() {}
912 }; 941 };
913 } // namespace webrtc 942 } // namespace webrtc
914 943
915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 944 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698