webrtc/modules/audio_processing/include/audio_processing.h - Issue 1234463003: Integrate Intelligibility with APM

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Addressed comments from Patch Set 7 Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | webrtc/voice_engine/output_mixer.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
109 Beamforming()	109 Beamforming()

110 : enabled(false),	110 : enabled(false),

111 array_geometry() {}	111 array_geometry() {}

112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)	112 Beamforming(bool enabled, const std::vector<Point>& array_geometry)

113 : enabled(enabled),	113 : enabled(enabled),

114 array_geometry(array_geometry) {}	114 array_geometry(array_geometry) {}

115 const bool enabled;	115 const bool enabled;

116 const std::vector<Point> array_geometry;	116 const std::vector<Point> array_geometry;

117 };	117 };

118	118

	119 // Use to enable intelligibility enhancer in audio processing. Must be provided

	120 // though the constructor. It will have no impact if used with

	121 // AudioProcessing::SetExtraOptions().

	122 //

	123 // Note: If enabled and the reverse stream has more than one output channel,

	124 // the reverse stream will become an upmixed mono signal.

	125 struct Intelligibility {

	126 Intelligibility() : enabled(false) {}

	127 explicit Intelligibility(bool enabled) : enabled(enabled) {}

	128 bool enabled;

	129 };

	130

119 static const int kAudioProcMaxNativeSampleRateHz = 32000;	131 static const int kAudioProcMaxNativeSampleRateHz = 32000;

120	132

121 // The Audio Processing Module (APM) provides a collection of voice processing	133 // The Audio Processing Module (APM) provides a collection of voice processing

122 // components designed for real-time communications software.	134 // components designed for real-time communications software.

123 //	135 //

124 // APM operates on two audio streams on a frame-by-frame basis. Frames of the	136 // APM operates on two audio streams on a frame-by-frame basis. Frames of the

125 // primary stream, on which all processing is applied, are passed to	137 // primary stream, on which all processing is applied, are passed to

126 // \|ProcessStream()\|. Frames of the reverse direction stream, which are used for	138 // \|ProcessStream()\|. Frames of the reverse direction stream, which are used for

127 // analysis by some components, are passed to \|AnalyzeReverseStream()\|. On the	139 // analysis by some components, are passed to \|AnalyzeReverseStream()\|. On the

128 // client-side, this will typically be the near-end (capture) and far-end	140 // client-side, this will typically be the near-end (capture) and far-end

(...skipping 197 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
326 // reverse stream forms the echo reference signal. It is recommended, but not	338 // reverse stream forms the echo reference signal. It is recommended, but not

327 // necessary, to provide if gain control is enabled. On the server-side this	339 // necessary, to provide if gain control is enabled. On the server-side this

328 // typically will not be used. If you're not sure what to pass in here,	340 // typically will not be used. If you're not sure what to pass in here,

329 // chances are you don't need to use it.	341 // chances are you don't need to use it.

330 //	342 //

331 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|	343 // The \|sample_rate_hz_\|, \|num_channels_\|, and \|samples_per_channel_\|

332 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to	344 // members of \|frame\| must be valid. \|sample_rate_hz_\| must correspond to

333 // \|input_sample_rate_hz()\|	345 // \|input_sample_rate_hz()\|

334 //	346 //

335 // TODO(ajm): add const to input; requires an implementation fix.	347 // TODO(ajm): add const to input; requires an implementation fix.

	348 // DEPRECATED: Use \|ProcessReverseStream\| instead.

	349 // TODO(ekm): Remove once all users have updated to \|ProcessReverseStream\|.

336 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;	350 virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;

337	351

	352 // Same as \|AnalyzeReverseStream\|, but may modify \|data\| if intelligibility

	353 // is enabled.

	354 virtual int ProcessReverseStream(AudioFrame* frame) = 0;

	355

338 // Accepts deinterleaved float audio with the range [-1, 1]. Each element	356 // Accepts deinterleaved float audio with the range [-1, 1]. Each element

339 // of \|data\| points to a channel buffer, arranged according to \|layout\|.	357 // of \|data\| points to a channel buffer, arranged according to \|layout\|.

	358 // TODO(mgraczyk): Remove once clients are updated to use the new interface.

340 //	359 //

341 // TODO(mgraczyk): Remove once clients are updated to use the new interface.	360 // DEPRECATED: Use \|ProcessReverseStream\| instead.

	361 // TODO(ekm): Remove once all users have updated to \|ProcessReverseStream\|.

342 virtual int AnalyzeReverseStream(const float* const* data,	362 virtual int AnalyzeReverseStream(const float* const* data,

343 int samples_per_channel,	363 int samples_per_channel,

344 int sample_rate_hz,	364 int rev_sample_rate_hz,

	365 ChannelLayout layout) = 0;

	366

	367 virtual int ProcessReverseStream(float* const* data,
	Andrew MacDonald 2015/07/30 03:53:17 You don't need this one now... You don't need this one now... ekm 2015/07/30 06:15:19 Done. Removed. Show quoted text On 2015/07/30 03:53:17, andrew wrote: > You don't need this one now... Done. Removed.
	368 int samples_per_channel,

	369 int rev_sample_rate_hz,

345 ChannelLayout layout) = 0;	370 ChannelLayout layout) = 0;

346	371

347 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of	372 // Accepts deinterleaved float audio with the range [-1, 1]. Each element of

348 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.	373 // \|data\| points to a channel buffer, arranged according to \|reverse_config\|.

	374 // DEPRECATED: Use \|ProcessReverseStream\| instead.

	375 // TODO(ekm): Remove once all users have updated to \|ProcessReverseStream\|.

349 virtual int AnalyzeReverseStream(const float* const* data,	376 virtual int AnalyzeReverseStream(const float* const* data,
	Andrew MacDonald 2015/07/30 03:53:17 I don't think any clients are using this yet besid I don't think any clients are using this yet besides tests. You could remove it in this CL. ekm 2015/07/30 06:15:19 Done. Since we're still supporting AnalyzeReverseS Show quoted text On 2015/07/30 03:53:17, andrew wrote: > I don't think any clients are using this yet besides tests. You could remove it > in this CL. Done. Since we're still supporting AnalyzeReverseStream for the other float interface, and that uses this, I've moved this AnalyzeReverseStream to private. Also, since this was only used in tests, I've updated this interface to support a distinct reverse output stream.
350 const StreamConfig& reverse_config) = 0;	377 const StreamConfig& reverse_config) = 0;

351	378

	379 virtual int ProcessReverseStream(float* const* data,

	380 const StreamConfig& reverse_config) = 0;

	381

352 // This must be called if and only if echo processing is enabled.	382 // This must be called if and only if echo processing is enabled.

353 //	383 //

354 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end	384 // Sets the \|delay\| in ms between AnalyzeReverseStream() receiving a far-end

355 // frame and ProcessStream() receiving a near-end frame containing the	385 // frame and ProcessStream() receiving a near-end frame containing the

356 // corresponding echo. On the client-side this can be expressed as	386 // corresponding echo. On the client-side this can be expressed as

357 // delay = (t_render - t_analyze) + (t_process - t_capture)	387 // delay = (t_render - t_analyze) + (t_process - t_capture)

358 // where,	388 // where,

359 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and	389 // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and

360 // t_render is the time the first sample of the same frame is rendered by	390 // t_render is the time the first sample of the same frame is rendered by

361 // the audio hardware.	391 // the audio hardware.

(...skipping 544 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
906 // This does not impact the size of frames passed to \|ProcessStream()\|.	936 // This does not impact the size of frames passed to \|ProcessStream()\|.

907 virtual int set_frame_size_ms(int size) = 0;	937 virtual int set_frame_size_ms(int size) = 0;

908 virtual int frame_size_ms() const = 0;	938 virtual int frame_size_ms() const = 0;

909	939

910 protected:	940 protected:

911 virtual ~VoiceDetection() {}	941 virtual ~VoiceDetection() {}

912 };	942 };

913 } // namespace webrtc	943 } // namespace webrtc

914	944

915 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_	945 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

OLD	NEW