Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(373)

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Added rev conversion for unprocessed case Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
109 Beamforming() 109 Beamforming()
110 : enabled(false), 110 : enabled(false),
111 array_geometry() {} 111 array_geometry() {}
112 Beamforming(bool enabled, const std::vector<Point>& array_geometry) 112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)
113 : enabled(enabled), 113 : enabled(enabled),
114 array_geometry(array_geometry) {} 114 array_geometry(array_geometry) {}
115 const bool enabled; 115 const bool enabled;
116 const std::vector<Point> array_geometry; 116 const std::vector<Point> array_geometry;
117 }; 117 };
118 118
119 // Use to enable intelligibility enhancer in audio processing. Must be provided
120 // though the constructor. It will have no impact if used with
121 // AudioProcessing::SetExtraOptions().
122 //
123 // Note: If enabled and the reverse stream has more than one output channel,
124 // the reverse stream will become an upmixed mono signal.
125 struct Intelligibility {
126 Intelligibility() : enabled(false) {}
127 explicit Intelligibility(bool enabled) : enabled(enabled) {}
128 bool enabled;
129 };
130
119 static const int kAudioProcMaxNativeSampleRateHz = 32000; 131 static const int kAudioProcMaxNativeSampleRateHz = 32000;
120 132
121 // The Audio Processing Module (APM) provides a collection of voice processing 133 // The Audio Processing Module (APM) provides a collection of voice processing
122 // components designed for real-time communications software. 134 // components designed for real-time communications software.
123 // 135 //
124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the 136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the
125 // primary stream, on which all processing is applied, are passed to 137 // primary stream, on which all processing is applied, are passed to
126 // |ProcessStream()|. Frames of the reverse direction stream, which are used for 138 // |ProcessStream()|. Frames of the reverse direction stream, which are used for
127 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the 139 // analysis by some components, are passed to |AnalyzeReverseStream()|. On the
128 // client-side, this will typically be the near-end (capture) and far-end 140 // client-side, this will typically be the near-end (capture) and far-end
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
326 // reverse stream forms the echo reference signal. It is recommended, but not 338 // reverse stream forms the echo reference signal. It is recommended, but not
327 // necessary, to provide if gain control is enabled. On the server-side this 339 // necessary, to provide if gain control is enabled. On the server-side this
328 // typically will not be used. If you're not sure what to pass in here, 340 // typically will not be used. If you're not sure what to pass in here,
329 // chances are you don't need to use it. 341 // chances are you don't need to use it.
330 // 342 //
331 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| 343 // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
332 // members of |frame| must be valid. |sample_rate_hz_| must correspond to 344 // members of |frame| must be valid. |sample_rate_hz_| must correspond to
333 // |input_sample_rate_hz()| 345 // |input_sample_rate_hz()|
334 // 346 //
335 // TODO(ajm): add const to input; requires an implementation fix. 347 // TODO(ajm): add const to input; requires an implementation fix.
348 // DEPRECATED: Use |ProcessReverseStream| instead.
349 // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|.
336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; 350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
337 351
352 // Same as |AnalyzeReverseStream|, but may modify |frame| if intelligibility
353 // is enabled.
354 virtual int ProcessReverseStream(AudioFrame* frame) = 0;
355
338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element 356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element
339 // of |data| points to a channel buffer, arranged according to |layout|. 357 // of |data| points to a channel buffer, arranged according to |layout|.
340 //
341 // TODO(mgraczyk): Remove once clients are updated to use the new interface. 358 // TODO(mgraczyk): Remove once clients are updated to use the new interface.
342 virtual int AnalyzeReverseStream(const float* const* data, 359 virtual int AnalyzeReverseStream(const float* const* data,
343 int samples_per_channel, 360 int samples_per_channel,
344 int sample_rate_hz, 361 int rev_sample_rate_hz,
345 ChannelLayout layout) = 0; 362 ChannelLayout layout) = 0;
346 363
347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of 364 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
348 // |data| points to a channel buffer, arranged according to |reverse_config|. 365 // |data| points to a channel buffer, arranged according to |reverse_config|.
349 virtual int AnalyzeReverseStream(const float* const* data, 366 virtual int ProcessReverseStream(const float* const* src,
350 const StreamConfig& reverse_config) = 0; 367 const StreamConfig& reverse_input_config,
368 const StreamConfig& reverse_output_config,
369 float* const* dest) = 0;
351 370
352 // This must be called if and only if echo processing is enabled. 371 // This must be called if and only if echo processing is enabled.
353 // 372 //
354 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end 373 // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
355 // frame and ProcessStream() receiving a near-end frame containing the 374 // frame and ProcessStream() receiving a near-end frame containing the
356 // corresponding echo. On the client-side this can be expressed as 375 // corresponding echo. On the client-side this can be expressed as
357 // delay = (t_render - t_analyze) + (t_process - t_capture) 376 // delay = (t_render - t_analyze) + (t_process - t_capture)
358 // where, 377 // where,
359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and 378 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and
360 // t_render is the time the first sample of the same frame is rendered by 379 // t_render is the time the first sample of the same frame is rendered by
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 int num_channels_; 529 int num_channels_;
511 bool has_keyboard_; 530 bool has_keyboard_;
512 int num_frames_; 531 int num_frames_;
513 }; 532 };
514 533
515 class ProcessingConfig { 534 class ProcessingConfig {
516 public: 535 public:
517 enum StreamName { 536 enum StreamName {
518 kInputStream, 537 kInputStream,
519 kOutputStream, 538 kOutputStream,
520 kReverseStream, 539 kReverseInputStream,
540 kReverseOutputStream,
521 kNumStreamNames, 541 kNumStreamNames,
522 }; 542 };
523 543
524 const StreamConfig& input_stream() const { 544 const StreamConfig& input_stream() const {
525 return streams[StreamName::kInputStream]; 545 return streams[StreamName::kInputStream];
526 } 546 }
527 const StreamConfig& output_stream() const { 547 const StreamConfig& output_stream() const {
528 return streams[StreamName::kOutputStream]; 548 return streams[StreamName::kOutputStream];
529 } 549 }
530 const StreamConfig& reverse_stream() const { 550 const StreamConfig& reverse_input_stream() const {
531 return streams[StreamName::kReverseStream]; 551 return streams[StreamName::kReverseInputStream];
552 }
553 const StreamConfig& reverse_output_stream() const {
554 return streams[StreamName::kReverseOutputStream];
532 } 555 }
533 556
534 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } 557 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
535 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } 558 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
536 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; } 559 StreamConfig& reverse_input_stream() {
560 return streams[StreamName::kReverseInputStream];
561 }
562 StreamConfig& reverse_output_stream() {
563 return streams[StreamName::kReverseOutputStream];
564 }
537 565
538 bool operator==(const ProcessingConfig& other) const { 566 bool operator==(const ProcessingConfig& other) const {
539 for (int i = 0; i < StreamName::kNumStreamNames; ++i) { 567 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
540 if (this->streams[i] != other.streams[i]) { 568 if (this->streams[i] != other.streams[i]) {
541 return false; 569 return false;
542 } 570 }
543 } 571 }
544 return true; 572 return true;
545 } 573 }
546 574
(...skipping 359 matching lines...) Expand 10 before | Expand all | Expand 10 after
906 // This does not impact the size of frames passed to |ProcessStream()|. 934 // This does not impact the size of frames passed to |ProcessStream()|.
907 virtual int set_frame_size_ms(int size) = 0; 935 virtual int set_frame_size_ms(int size) = 0;
908 virtual int frame_size_ms() const = 0; 936 virtual int frame_size_ms() const = 0;
909 937
910 protected: 938 protected:
911 virtual ~VoiceDetection() {} 939 virtual ~VoiceDetection() {}
912 }; 940 };
913 } // namespace webrtc 941 } // namespace webrtc
914 942
915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ 943 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698