webrtc/modules/audio_processing/include/audio_processing.h - Issue 1234463003: Integrate Intelligibility with APM

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Added rev conversion for unprocessed case Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/audio_processing_impl.cc ('K') | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 Beamforming()	109 Beamforming()

110 : enabled(false),	110 : enabled(false),

111 array_geometry() {}	111 array_geometry() {}

112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)	112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)

113 : enabled(enabled),	113 : enabled(enabled),

114 array_geometry(array_geometry) {}	114 array_geometry(array_geometry) {}

115 const bool enabled;	115 const bool enabled;

116 const std::vector<Point> array_geometry;	116 const std::vector<Point> array_geometry;

117 };	117 };

118	118

	119 // Use to enable intelligibility enhancer in audio processing. Must be provided

	120 // though the constructor. It will have no impact if used with

	121 // AudioProcessing::SetExtraOptions().

	122 //

	123 // Note: If enabled and the reverse stream has more than one output channel,

	124 // the reverse stream will become an upmixed mono signal.

	125 struct Intelligibility {

	126 Intelligibility() : enabled(false) {}

	127 explicit Intelligibility(bool enabled) : enabled(enabled) {}

	128 bool enabled;

	129 };

	130

119 static const int kAudioProcMaxNativeSampleRateHz = 32000;	131 static const int kAudioProcMaxNativeSampleRateHz = 32000;

120	132

121 // The Audio Processing Module (APM) provides a collection of voice processing	133 // The Audio Processing Module (APM) provides a collection of voice processing

122 // components designed for real-time communications software.	134 // components designed for real-time communications software.

123 //	135 //

124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the	136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the

125 // primary stream, on which all processing is applied, are passed to	137 // primary stream, on which all processing is applied, are passed to

126 // \|ProcessStream()\|. Frames of the reverse direction stream, which are used for	138 // \|ProcessStream()\|. Frames of the reverse direction stream, which are used for

127 // analysis by some components, are passed to \|AnalyzeReverseStream()\|. On the	139 // analysis by some components, are passed to \|AnalyzeReverseStream()\|. On the

128 // client-side, this will typically be the near-end (capture) and far-end	140 // client-side, this will typically be the near-end (capture) and far-end

(...skipping 197 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
326 // reverse stream forms the echo reference signal. It is recommended, but not	338 // reverse stream forms the echo reference signal. It is recommended, but not

327 // necessary, to provide if gain control is enabled. On the server-side this	339 // necessary, to provide if gain control is enabled. On the server-side this

328 // typically will not be used. If you're not sure what to pass in here,	340 // typically will not be used. If you're not sure what to pass in here,

329 // chances are you don't need to use it.	341 // chances are you don't need to use it.

330 //	342 //

331 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|	343 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|

332 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to	344 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to

333 // \|input_sample_rate_hz()\|	345 // \|input_sample_rate_hz()\|

334 //	346 //

335 // TODO(ajm): add const to input; requires an implementation fix.	347 // TODO(ajm): add const to input; requires an implementation fix.

	348 // DEPRECATED: Use \|ProcessReverseStream\| instead.

	349 // TODO(ekm): Remove once all users have updated to \|ProcessReverseStream\|.

336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;	350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;

337	351

	352 // Same as \|AnalyzeReverseStream\|, but may modify \|frame\| if intelligibility

	353 // is enabled.

	354 virtual int ProcessReverseStream(AudioFrame* frame) = 0;

	355

338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element	356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element

339 // of \|data\| points to a channel buffer, arranged according to \|layout\|.	357 // of \|data\| points to a channel buffer, arranged according to \|layout\|.

340 //

341 // TODO(mgraczyk): Remove once clients are updated to use the new interface.	358 // TODO(mgraczyk): Remove once clients are updated to use the new interface.

342 virtual int AnalyzeReverseStream(const float* const* data,	359 virtual int AnalyzeReverseStream(const float* const* data,

343 int samples_per_channel,	360 int samples_per_channel,

344 int sample_rate_hz,	361 int rev_sample_rate_hz,

345 ChannelLayout layout) = 0;	362 ChannelLayout layout) = 0;

346	363

347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of	364 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

348 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.	365 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.

349 virtual int AnalyzeReverseStream(const float* const* data,	366 virtual int ProcessReverseStream(const float* const* src,

350 const StreamConfig& reverse_config) = 0;	367 const StreamConfig& reverse_input_config,

	368 const StreamConfig& reverse_output_config,

	369 float* const* dest) = 0;

351	370

352 // This must be called if and only if echo processing is enabled.	371 // This must be called if and only if echo processing is enabled.

353 //	372 //

354 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end	373 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end

355 // frame and ProcessStream() receiving a near-end frame containing the	374 // frame and ProcessStream() receiving a near-end frame containing the

356 // corresponding echo. On the client-side this can be expressed as	375 // corresponding echo. On the client-side this can be expressed as

357 // delay = (t_render - t_analyze) + (t_process - t_capture)	376 // delay = (t_render - t_analyze) + (t_process - t_capture)

358 // where,	377 // where,

359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and	378 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and

360 // t_render is the time the first sample of the same frame is rendered by	379 // t_render is the time the first sample of the same frame is rendered by

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
510 int num_channels_;	529 int num_channels_;

511 bool has_keyboard_;	530 bool has_keyboard_;

512 int num_frames_;	531 int num_frames_;

513 };	532 };

514	533

515 class ProcessingConfig {	534 class ProcessingConfig {

516 public:	535 public:

517 enum StreamName {	536 enum StreamName {

518 kInputStream,	537 kInputStream,

519 kOutputStream,	538 kOutputStream,

520 kReverseStream,	539 kReverseInputStream,

	540 kReverseOutputStream,

521 kNumStreamNames,	541 kNumStreamNames,

522 };	542 };

523	543

524 const StreamConfig& input_stream() const {	544 const StreamConfig& input_stream() const {

525 return streams[StreamName::kInputStream];	545 return streams[StreamName::kInputStream];

526 }	546 }

527 const StreamConfig& output_stream() const {	547 const StreamConfig& output_stream() const {

528 return streams[StreamName::kOutputStream];	548 return streams[StreamName::kOutputStream];

529 }	549 }

530 const StreamConfig& reverse_stream() const {	550 const StreamConfig& reverse_input_stream() const {

531 return streams[StreamName::kReverseStream];	551 return streams[StreamName::kReverseInputStream];

	552 }

	553 const StreamConfig& reverse_output_stream() const {

	554 return streams[StreamName::kReverseOutputStream];

532 }	555 }

533	556

534 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }	557 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }

535 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }	558 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }

536 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }	559 StreamConfig& reverse_input_stream() {

	560 return streams[StreamName::kReverseInputStream];

	561 }

	562 StreamConfig& reverse_output_stream() {

	563 return streams[StreamName::kReverseOutputStream];

	564 }

537	565

538 bool operator==(const ProcessingConfig& other) const {	566 bool operator==(const ProcessingConfig& other) const {

539 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {	567 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {

540 if (this->streams[i] != other.streams[i]) {	568 if (this->streams[i] != other.streams[i]) {

541 return false;	569 return false;

542 }	570 }

543 }	571 }

544 return true;	572 return true;

545 }	573 }

546	574

(...skipping 359 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
906 // This does not impact the size of frames passed to \|ProcessStream()\|.	934 // This does not impact the size of frames passed to \|ProcessStream()\|.

907 virtual int set_frame_size_ms(int size) = 0;	935 virtual int set_frame_size_ms(int size) = 0;

908 virtual int frame_size_ms() const = 0;	936 virtual int frame_size_ms() const = 0;

909	937

910 protected:	938 protected:

911 virtual ~VoiceDetection() {}	939 virtual ~VoiceDetection() {}

912 };	940 };

913 } // namespace webrtc	941 } // namespace webrtc

914	942

915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_	943 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

OLD	NEW