webrtc/modules/audio_processing/include/audio_processing.h - Issue 1234463003: Integrate Intelligibility with APM

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Fix Mac Error (3) Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 Beamforming()	109 Beamforming()

110 : enabled(false),	110 : enabled(false),

111 array_geometry() {}	111 array_geometry() {}

112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)	112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)

113 : enabled(enabled),	113 : enabled(enabled),

114 array_geometry(array_geometry) {}	114 array_geometry(array_geometry) {}

115 const bool enabled;	115 const bool enabled;

116 const std::vector<Point> array_geometry;	116 const std::vector<Point> array_geometry;

117 };	117 };

118	118

	119 // Use to enable intelligibility enhancer in audio processing. Must be provided

	120 // though the constructor. It will have no impact if used with

	121 // AudioProcessing::SetExtraOptions().

	122 //

	123 // Note: If enabled and the reverse stream has more than one output channel,

	124 // the reverse stream will become an upmixed mono signal.

	125 struct Intelligibility {

	126 Intelligibility() : enabled(false) {}

	127 explicit Intelligibility(bool enabled) : enabled(enabled) {}

	128 bool enabled;

	129 };

	130

119 static const int kAudioProcMaxNativeSampleRateHz = 32000;	131 static const int kAudioProcMaxNativeSampleRateHz = 32000;

120	132

121 // The Audio Processing Module (APM) provides a collection of voice processing	133 // The Audio Processing Module (APM) provides a collection of voice processing

122 // components designed for real-time communications software.	134 // components designed for real-time communications software.

123 //	135 //

124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the	136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the

125 // primary stream, on which all processing is applied, are passed to	137 // primary stream, on which all processing is applied, are passed to

126 // \|ProcessStream()\|. Frames of the reverse direction stream, which are used for	138 // \|ProcessStream()\|. Frames of the reverse direction stream, which are used for

127 // analysis by some components, are passed to \|AnalyzeReverseStream()\|. On the	139 // analysis by some components, are passed to \|AnalyzeReverseStream()\|. On the

128 // client-side, this will typically be the near-end (capture) and far-end	140 // client-side, this will typically be the near-end (capture) and far-end

(...skipping 197 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
326 // reverse stream forms the echo reference signal. It is recommended, but not	338 // reverse stream forms the echo reference signal. It is recommended, but not

327 // necessary, to provide if gain control is enabled. On the server-side this	339 // necessary, to provide if gain control is enabled. On the server-side this

328 // typically will not be used. If you're not sure what to pass in here,	340 // typically will not be used. If you're not sure what to pass in here,

329 // chances are you don't need to use it.	341 // chances are you don't need to use it.

330 //	342 //

331 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|	343 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|

332 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to	344 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to

333 // \|input_sample_rate_hz()\|	345 // \|input_sample_rate_hz()\|

334 //	346 //

335 // TODO(ajm): add const to input; requires an implementation fix.	347 // TODO(ajm): add const to input; requires an implementation fix.

	348 // DEPRECATED: Use \|ProcessReverseStream\| instead.

	349 // TODO(ekm): Remove once all users have updated to \|ProcessReverseStream\|.

336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;	350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;

337	351

	352 // Same as \|AnalyzeReverseStream\|, but may modify \|frame\| if intelligibility

	353 // is enabled.

	354 virtual int ProcessReverseStream(AudioFrame* frame) = 0;

	355

338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element	356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element

339 // of \|data\| points to a channel buffer, arranged according to \|layout\|.	357 // of \|data\| points to a channel buffer, arranged according to \|layout\|.

340 //

341 // TODO(mgraczyk): Remove once clients are updated to use the new interface.	358 // TODO(mgraczyk): Remove once clients are updated to use the new interface.

342 virtual int AnalyzeReverseStream(const float* const* data,	359 virtual int AnalyzeReverseStream(const float* const* data,

343 int samples_per_channel,	360 int samples_per_channel,

344 int sample_rate_hz,	361 int rev_sample_rate_hz,

345 ChannelLayout layout) = 0;	362 ChannelLayout layout) = 0;

346	363

347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of	364 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

348 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.	365 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.

349 virtual int AnalyzeReverseStream(const float* const* data,	366 virtual int ProcessReverseStream(const float* const* src,

350 const StreamConfig& reverse_config) = 0;	367 const StreamConfig& reverse_input_config,

	368 const StreamConfig& reverse_output_config,

	369 float* const* dest) = 0;

351	370

352 // This must be called if and only if echo processing is enabled.	371 // This must be called if and only if echo processing is enabled.

353 //	372 //

354 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end	373 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end

355 // frame and ProcessStream() receiving a near-end frame containing the	374 // frame and ProcessStream() receiving a near-end frame containing the

356 // corresponding echo. On the client-side this can be expressed as	375 // corresponding echo. On the client-side this can be expressed as

357 // delay = (t_render - t_analyze) + (t_process - t_capture)	376 // delay = (t_render - t_analyze) + (t_process - t_capture)

358 // where,	377 // where,

359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and	378 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and

360 // t_render is the time the first sample of the same frame is rendered by	379 // t_render is the time the first sample of the same frame is rendered by

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
485 void set_has_keyboard(bool value) { has_keyboard_ = value; }	504 void set_has_keyboard(bool value) { has_keyboard_ = value; }

486	505

487 int sample_rate_hz() const { return sample_rate_hz_; }	506 int sample_rate_hz() const { return sample_rate_hz_; }

488	507

489 // The number of channels in the stream, not including the keyboard channel if	508 // The number of channels in the stream, not including the keyboard channel if

490 // present.	509 // present.

491 int num_channels() const { return num_channels_; }	510 int num_channels() const { return num_channels_; }

492	511

493 bool has_keyboard() const { return has_keyboard_; }	512 bool has_keyboard() const { return has_keyboard_; }

494 int num_frames() const { return num_frames_; }	513 int num_frames() const { return num_frames_; }

	514 int num_samples() const { return num_channels_ * num_frames_; }

495	515

496 bool operator==(const StreamConfig& other) const {	516 bool operator==(const StreamConfig& other) const {

497 return sample_rate_hz_ == other.sample_rate_hz_ &&	517 return sample_rate_hz_ == other.sample_rate_hz_ &&

498 num_channels_ == other.num_channels_ &&	518 num_channels_ == other.num_channels_ &&

499 has_keyboard_ == other.has_keyboard_;	519 has_keyboard_ == other.has_keyboard_;

500 }	520 }

501	521

502 bool operator!=(const StreamConfig& other) const { return !(*this == other); }	522 bool operator!=(const StreamConfig& other) const { return !(*this == other); }

503	523

504 private:	524 private:

505 static int calculate_frames(int sample_rate_hz) {	525 static int calculate_frames(int sample_rate_hz) {

506 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;	526 return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;

507 }	527 }

508	528

509 int sample_rate_hz_;	529 int sample_rate_hz_;

510 int num_channels_;	530 int num_channels_;

511 bool has_keyboard_;	531 bool has_keyboard_;

512 int num_frames_;	532 int num_frames_;

513 };	533 };

514	534

515 class ProcessingConfig {	535 class ProcessingConfig {

516 public:	536 public:

517 enum StreamName {	537 enum StreamName {

518 kInputStream,	538 kInputStream,

519 kOutputStream,	539 kOutputStream,

520 kReverseStream,	540 kReverseInputStream,

	541 kReverseOutputStream,

521 kNumStreamNames,	542 kNumStreamNames,

522 };	543 };

523	544

524 const StreamConfig& input_stream() const {	545 const StreamConfig& input_stream() const {

525 return streams[StreamName::kInputStream];	546 return streams[StreamName::kInputStream];

526 }	547 }

527 const StreamConfig& output_stream() const {	548 const StreamConfig& output_stream() const {

528 return streams[StreamName::kOutputStream];	549 return streams[StreamName::kOutputStream];

529 }	550 }

530 const StreamConfig& reverse_stream() const {	551 const StreamConfig& reverse_input_stream() const {

531 return streams[StreamName::kReverseStream];	552 return streams[StreamName::kReverseInputStream];

	553 }

	554 const StreamConfig& reverse_output_stream() const {

	555 return streams[StreamName::kReverseOutputStream];

532 }	556 }

533	557

534 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }	558 StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }

535 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }	559 StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }

536 StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }	560 StreamConfig& reverse_input_stream() {

	561 return streams[StreamName::kReverseInputStream];

	562 }

	563 StreamConfig& reverse_output_stream() {

	564 return streams[StreamName::kReverseOutputStream];

	565 }

537	566

538 bool operator==(const ProcessingConfig& other) const {	567 bool operator==(const ProcessingConfig& other) const {

539 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {	568 for (int i = 0; i < StreamName::kNumStreamNames; ++i) {

540 if (this->streams[i] != other.streams[i]) {	569 if (this->streams[i] != other.streams[i]) {

541 return false;	570 return false;

542 }	571 }

543 }	572 }

544 return true;	573 return true;

545 }	574 }

546	575

(...skipping 359 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
906 // This does not impact the size of frames passed to \|ProcessStream()\|.	935 // This does not impact the size of frames passed to \|ProcessStream()\|.

907 virtual int set_frame_size_ms(int size) = 0;	936 virtual int set_frame_size_ms(int size) = 0;

908 virtual int frame_size_ms() const = 0;	937 virtual int frame_size_ms() const = 0;

909	938

910 protected:	939 protected:

911 virtual ~VoiceDetection() {}	940 virtual ~VoiceDetection() {}

912 };	941 };

913 } // namespace webrtc	942 } // namespace webrtc

914	943

915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_	944 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

OLD	NEW