webrtc/modules/audio_processing/include/audio_processing.h - Issue 2292863002: Introduced new scheme for controlling the functionality inside the audio processing module

Side by Side Diff: webrtc/modules/audio_processing/include/audio_processing.h

Issue 2292863002: Introduced new scheme for controlling the functionality inside the audio processing module (Closed)

Patch Set: Changes in response to reviewer comments Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/audio_processing_impl.cc ('K') | « webrtc/modules/audio_processing/echo_cancellation_impl.cc ('k') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('j') | webrtc/modules/audio_processing/include/mock_audio_processing.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
84 // EchoControlMobile. It can be set in the constructor	84 // EchoControlMobile. It can be set in the constructor

85 // or using AudioProcessing::SetExtraOptions().	85 // or using AudioProcessing::SetExtraOptions().

86 struct RefinedAdaptiveFilter {	86 struct RefinedAdaptiveFilter {

87 RefinedAdaptiveFilter() : enabled(false) {}	87 RefinedAdaptiveFilter() : enabled(false) {}

88 explicit RefinedAdaptiveFilter(bool enabled) : enabled(enabled) {}	88 explicit RefinedAdaptiveFilter(bool enabled) : enabled(enabled) {}

89 static const ConfigOptionID identifier =	89 static const ConfigOptionID identifier =

90 ConfigOptionID::kAecRefinedAdaptiveFilter;	90 ConfigOptionID::kAecRefinedAdaptiveFilter;

91 bool enabled;	91 bool enabled;

92 };	92 };

93	93

94 // Enables the adaptive level controller.

95 struct LevelControl {

96 LevelControl() : enabled(false) {}

97 explicit LevelControl(bool enabled) : enabled(enabled) {}

98 static const ConfigOptionID identifier = ConfigOptionID::kLevelControl;

99 bool enabled;

100 };

101

102 // Enables delay-agnostic echo cancellation. This feature relies on internally	94 // Enables delay-agnostic echo cancellation. This feature relies on internally

103 // estimated delays between the process and reverse streams, thus not relying	95 // estimated delays between the process and reverse streams, thus not relying

104 // on reported system delays. This configuration only applies to	96 // on reported system delays. This configuration only applies to

105 // EchoCancellation and not EchoControlMobile. It can be set in the constructor	97 // EchoCancellation and not EchoControlMobile. It can be set in the constructor

106 // or using AudioProcessing::SetExtraOptions().	98 // or using AudioProcessing::SetExtraOptions().

107 struct DelayAgnostic {	99 struct DelayAgnostic {

108 DelayAgnostic() : enabled(false) {}	100 DelayAgnostic() : enabled(false) {}

109 explicit DelayAgnostic(bool enabled) : enabled(enabled) {}	101 explicit DelayAgnostic(bool enabled) : enabled(enabled) {}

110 static const ConfigOptionID identifier = ConfigOptionID::kDelayAgnostic;	102 static const ConfigOptionID identifier = ConfigOptionID::kDelayAgnostic;

111 bool enabled;	103 bool enabled;

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
198 // 2. Parameter getters are never called concurrently with the corresponding	190 // 2. Parameter getters are never called concurrently with the corresponding

199 // setter.	191 // setter.

200 //	192 //

201 // APM accepts only linear PCM audio data in chunks of 10 ms. The int16	193 // APM accepts only linear PCM audio data in chunks of 10 ms. The int16

202 // interfaces use interleaved data, while the float interfaces use deinterleaved	194 // interfaces use interleaved data, while the float interfaces use deinterleaved

203 // data.	195 // data.

204 //	196 //

205 // Usage example, omitting error checking:	197 // Usage example, omitting error checking:

206 // AudioProcessing* apm = AudioProcessing::Create(0);	198 // AudioProcessing* apm = AudioProcessing::Create(0);

207 //	199 //

	200 // AudioProcessing::Config config;

	201 // config.level_controller.enable = true;

	202 // apm->ApplyConfig(config);
	the sun 2016/09/01 13:48:00 So I guess it is best if the example includes hand So I guess it is best if the example includes handling the return code, perhaps with an RTC_CHECK(). peah-webrtc 2016/09/02 08:22:00 I rewrote this again after feedback from the other Show quoted text On 2016/09/01 13:48:00, the sun wrote: > So I guess it is best if the example includes handling the return code, perhaps > with an RTC_CHECK(). I rewrote this again after feedback from the other reviewer. the sun 2016/09/02 09:37:05 I don't see that feedback in this CL, unless you'r Show quoted text On 2016/09/02 08:22:00, peah-webrtc wrote: > On 2016/09/01 13:48:00, the sun wrote: > > So I guess it is best if the example includes handling the return code, > perhaps > > with an RTC_CHECK(). > > I rewrote this again after feedback from the other reviewer. I don't see that feedback in this CL, unless you're talking about Henrik's first set of comments? I'm confused, because you already changed the implementation after that. peah-webrtc 2016/09/07 05:42:59 That feedback is now in the comments from Henrik. Show quoted text On 2016/09/02 09:37:05, the sun wrote: > On 2016/09/02 08:22:00, peah-webrtc wrote: > > On 2016/09/01 13:48:00, the sun wrote: > > > So I guess it is best if the example includes handling the return code, > > perhaps > > > with an RTC_CHECK(). > > > > I rewrote this again after feedback from the other reviewer. > > I don't see that feedback in this CL, unless you're talking about Henrik's first > set of comments? I'm confused, because you already changed the implementation > after that. That feedback is now in the comments from Henrik.
	203 //

208 // apm->high_pass_filter()->Enable(true);	204 // apm->high_pass_filter()->Enable(true);

209 //	205 //

210 // apm->echo_cancellation()->enable_drift_compensation(false);	206 // apm->echo_cancellation()->enable_drift_compensation(false);

211 // apm->echo_cancellation()->Enable(true);	207 // apm->echo_cancellation()->Enable(true);

212 //	208 //

213 // apm->noise_reduction()->set_level(kHighSuppression);	209 // apm->noise_reduction()->set_level(kHighSuppression);

214 // apm->noise_reduction()->Enable(true);	210 // apm->noise_reduction()->Enable(true);

215 //	211 //

216 // apm->gain_control()->set_analog_level_limits(0, 255);	212 // apm->gain_control()->set_analog_level_limits(0, 255);

217 // apm->gain_control()->set_mode(kAdaptiveAnalog);	213 // apm->gain_control()->set_mode(kAdaptiveAnalog);

(...skipping 19 matching lines...) Expand all Loading...
237 //	233 //

238 // // Repeate render and capture processing for the duration of the call...	234 // // Repeate render and capture processing for the duration of the call...

239 // // Start a new call...	235 // // Start a new call...

240 // apm->Initialize();	236 // apm->Initialize();

241 //	237 //

242 // // Close the application...	238 // // Close the application...

243 // delete apm;	239 // delete apm;

244 //	240 //

245 class AudioProcessing {	241 class AudioProcessing {

246 public:	242 public:

	243 // The struct below constitutes the new parameter scheme for the

	244 // audio processing functionality. It is being introduced gradually and

	245 // until it is fully introduced, it is prone to change.

	246 //

	247 // The parameters and behavior of the audio processing module are controlled

	248 // by changing the default values in the ApmConfig struct. The config is
	the sun 2016/09/01 13:48:00 ApmConfig -> AudioProcessing::Config ApmConfig -> AudioProcessing::Config peah-webrtc 2016/09/02 08:22:00 Done. Show quoted text On 2016/09/01 13:48:00, the sun wrote: > ApmConfig -> AudioProcessing::Config Done.
	249 // applied by passing the struct to the ApplyConfig method in the audio

	250 // processing interface. It is possible to verify the specified parameters by
	the sun 2016/09/01 13:48:01 "audio processing interface" is unnecessary in thi "audio processing interface" is unnecessary in this context. peah-webrtc 2016/09/02 08:22:00 Done. Show quoted text On 2016/09/01 13:48:01, the sun wrote: > "audio processing interface" is unnecessary in this context. Done.
	251 // calling the Validate() method.

	252 struct Config {

	253 struct LevelController {

	254 bool enabled = false;

	255 } level_controller;

	256 bool Validate() const;
	the sun 2016/09/01 13:48:00 Please remove this method and keep it a plain stru Please remove this method and keep it a plain struct. peah-webrtc 2016/09/02 08:22:00 The argument by the other reviewer is that this is Show quoted text On 2016/09/01 13:48:00, the sun wrote: > Please remove this method and keep it a plain struct. The argument by the other reviewer is that this is good to have as it allows the user to test a config without having to crash the application once we set these settings during construction time. the sun 2016/09/02 09:37:05 Well, sure, but we could just as well handle that Show quoted text On 2016/09/02 08:22:00, peah-webrtc wrote: > On 2016/09/01 13:48:00, the sun wrote: > > Please remove this method and keep it a plain struct. > > The argument by the other reviewer is that this is good to have as it allows the > user to test a config without having to crash the application once we set these > settings during construction time. Well, sure, but we could just as well handle that in the ctor then: either we DCHECK the config is valid and revert to some sane settings if it isn't, or we CHECK the config, if we're lazy or just don't believe we can recover. In the unlikely even that the client actually does DCHECK(Validate) on the config before giving it to the ctor, we still need to prepare for an invalid config. And besides, what is the client to do with the information? Additionally, none of the other places where we use construct-from-config semantics, employ a Validate() method. I know, consistency is the hobgoblin of little minds etc. :) But still, it makes sense to make the APIs work the same. There's a better way than all of this, of course, which is to use types for the config parameters that enforce these constraints; ranges etc. hlundin-webrtc 2016/09/06 08:56:03 We use this pattern for the AudioEncoders. Each im Show quoted text On 2016/09/02 09:37:05, the sun wrote: > On 2016/09/02 08:22:00, peah-webrtc wrote: > > On 2016/09/01 13:48:00, the sun wrote: > > > Please remove this method and keep it a plain struct. > > > > The argument by the other reviewer is that this is good to have as it allows > the > > user to test a config without having to crash the application once we set > these > > settings during construction time. > > Well, sure, but we could just as well handle that in the ctor then: either we > DCHECK the config is valid and revert to some sane settings if it isn't, or we > CHECK the config, if we're lazy or just don't believe we can recover. In the > unlikely even that the client actually does DCHECK(Validate) on the config > before giving it to the ctor, we still need to prepare for an invalid config. > And besides, what is the client to do with the information? > > Additionally, none of the other places where we use construct-from-config > semantics, employ a Validate() method. I know, consistency is the hobgoblin of > little minds etc. :) But still, it makes sense to make the APIs work the same. > > There's a better way than all of this, of course, which is to use types for the > config parameters that enforce these constraints; ranges etc. We use this pattern for the AudioEncoders. Each implementation of the AudioEncoder interface has a Config which can be passed to the ctor. The ctor will crash on invalid configurations (silently modifying it to a valid config is not tractable), and it is the responsibility of the caller to validate it. What is the caller to do with a failed validation? Well, unlike the ctor, it can probably flag an error, and maybe revert to a default config if that makes sense. peah-webrtc 2016/09/07 05:42:59 Acknowledged. Show quoted text On 2016/09/06 08:56:03, hlundin-webrtc wrote: > On 2016/09/02 09:37:05, the sun wrote: > > On 2016/09/02 08:22:00, peah-webrtc wrote: > > > On 2016/09/01 13:48:00, the sun wrote: > > > > Please remove this method and keep it a plain struct. > > > > > > The argument by the other reviewer is that this is good to have as it allows > > the > > > user to test a config without having to crash the application once we set > > these > > > settings during construction time. > > > > Well, sure, but we could just as well handle that in the ctor then: either we > > DCHECK the config is valid and revert to some sane settings if it isn't, or we > > CHECK the config, if we're lazy or just don't believe we can recover. In the > > unlikely even that the client actually does DCHECK(Validate) on the config > > before giving it to the ctor, we still need to prepare for an invalid config. > > And besides, what is the client to do with the information? > > > > Additionally, none of the other places where we use construct-from-config > > semantics, employ a Validate() method. I know, consistency is the hobgoblin of > > little minds etc. :) But still, it makes sense to make the APIs work the same. > > > > There's a better way than all of this, of course, which is to use types for > the > > config parameters that enforce these constraints; ranges etc. > > We use this pattern for the AudioEncoders. Each implementation of the > AudioEncoder interface has a Config which can be passed to the ctor. The ctor > will crash on invalid configurations (silently modifying it to a valid config is > not tractable), and it is the responsibility of the caller to validate it. What > is the caller to do with a failed validation? Well, unlike the ctor, it can > probably flag an error, and maybe revert to a default config if that makes > sense. Acknowledged. peah-webrtc 2016/09/07 05:42:59 Types would be a good step on the way, but it is e Show quoted text On 2016/09/02 09:37:05, the sun wrote: > On 2016/09/02 08:22:00, peah-webrtc wrote: > > On 2016/09/01 13:48:00, the sun wrote: > > > Please remove this method and keep it a plain struct. > > > > The argument by the other reviewer is that this is good to have as it allows > the > > user to test a config without having to crash the application once we set > these > > settings during construction time. > > Well, sure, but we could just as well handle that in the ctor then: either we > DCHECK the config is valid and revert to some sane settings if it isn't, or we > CHECK the config, if we're lazy or just don't believe we can recover. In the > unlikely even that the client actually does DCHECK(Validate) on the config > before giving it to the ctor, we still need to prepare for an invalid config. > And besides, what is the client to do with the information? > > Additionally, none of the other places where we use construct-from-config > semantics, employ a Validate() method. I know, consistency is the hobgoblin of > little minds etc. :) But still, it makes sense to make the APIs work the same. > > There's a better way than all of this, of course, which is to use types for the > config parameters that enforce these constraints; ranges etc. Types would be a good step on the way, but it is extremely hard, and becomes quite messy if generalized, to incorporate valid intra-parameter dependencies. An example of that (which can be eliminated in the long run using AEC modes, but for the sake of discussion lets assume it cannot), is the selection of the mobile aec and aec. As it is now, each ProcessStream call checks for whether these are activated at the same time and return an error. It would be much nicer if that error could instead be returned at the time of the creation of APM/at the time when the parameters in APM are set. Letting the ctor crash for such a setup is not the right thing to do and in my mind it would then be better to 1) Let the Validate() method flag that an improper setting is used. 2) Let the ctor default to the safest choice of only using the mobile aec in this case.
	257 };

	258

247 // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.	259 // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.

248 enum ChannelLayout {	260 enum ChannelLayout {

249 kMono,	261 kMono,

250 // Left, right.	262 // Left, right.

251 kStereo,	263 kStereo,

252 // Mono, keyboard mic.	264 // Mono, keyboard mic.

253 kMonoAndKeyboard,	265 kMonoAndKeyboard,

254 // Left, right, keyboard mic.	266 // Left, right, keyboard mic.

255 kStereoAndKeyboard	267 kStereoAndKeyboard

256 };	268 };

257	269

258 // Creates an APM instance. Use one instance for every primary audio stream	270 // Creates an APM instance. Use one instance for every primary audio stream

259 // requiring processing. On the client-side, this would typically be one	271 // requiring processing. On the client-side, this would typically be one

260 // instance for the near-end stream, and additional instances for each far-end	272 // instance for the near-end stream, and additional instances for each far-end

261 // stream which requires processing. On the server-side, this would typically	273 // stream which requires processing. On the server-side, this would typically

262 // be one instance for every incoming stream.	274 // be one instance for every incoming stream.

263 static AudioProcessing* Create();	275 static AudioProcessing* Create();

264 // Allows passing in an optional configuration at create-time.	276 // Allows passing in an optional configuration at create-time.

265 static AudioProcessing* Create(const Config& config);	277 static AudioProcessing* Create(const webrtc::Config& config);

266 // Only for testing.	278 // Only for testing.

267 static AudioProcessing* Create(const Config& config,	279 static AudioProcessing* Create(const webrtc::Config& config,

268 NonlinearBeamformer* beamformer);	280 NonlinearBeamformer* beamformer);

269 virtual ~AudioProcessing() {}	281 virtual ~AudioProcessing() {}

270	282

271 // Initializes internal states, while retaining all user settings. This	283 // Initializes internal states, while retaining all user settings. This

272 // should be called before beginning to process a new audio stream. However,	284 // should be called before beginning to process a new audio stream. However,

273 // it is not necessary to call before processing the first stream after	285 // it is not necessary to call before processing the first stream after

274 // creation.	286 // creation.

275 //	287 //

276 // It is also not necessary to call if the audio parameters (sample	288 // It is also not necessary to call if the audio parameters (sample

277 // rate and number of channels) have changed. Passing updated parameters	289 // rate and number of channels) have changed. Passing updated parameters

(...skipping 15 matching lines...) Expand all Loading...
293 // Initialize with unpacked parameters. See Initialize() above for details.	305 // Initialize with unpacked parameters. See Initialize() above for details.

294 //	306 //

295 // TODO(mgraczyk): Remove once clients are updated to use the new interface.	307 // TODO(mgraczyk): Remove once clients are updated to use the new interface.

296 virtual int Initialize(int input_sample_rate_hz,	308 virtual int Initialize(int input_sample_rate_hz,

297 int output_sample_rate_hz,	309 int output_sample_rate_hz,

298 int reverse_sample_rate_hz,	310 int reverse_sample_rate_hz,

299 ChannelLayout input_layout,	311 ChannelLayout input_layout,

300 ChannelLayout output_layout,	312 ChannelLayout output_layout,

301 ChannelLayout reverse_layout) = 0;	313 ChannelLayout reverse_layout) = 0;

302	314

	315 // TODO(peah): This method is a temporary solution used to take control

	316 // over the parameters in the audio processing module and is likely to change.

	317 virtual bool ApplyConfig(const Config& config) = 0;

	318

303 // Pass down additional options which don't have explicit setters. This	319 // Pass down additional options which don't have explicit setters. This

304 // ensures the options are applied immediately.	320 // ensures the options are applied immediately.

305 virtual void SetExtraOptions(const Config& config) = 0;	321 virtual void SetExtraOptions(const webrtc::Config& config) = 0;

306	322

307 // TODO(ajm): Only intended for internal use. Make private and friend the	323 // TODO(ajm): Only intended for internal use. Make private and friend the

308 // necessary classes?	324 // necessary classes?

309 virtual int proc_sample_rate_hz() const = 0;	325 virtual int proc_sample_rate_hz() const = 0;

310 virtual int proc_split_sample_rate_hz() const = 0;	326 virtual int proc_split_sample_rate_hz() const = 0;

311 virtual size_t num_input_channels() const = 0;	327 virtual size_t num_input_channels() const = 0;

312 virtual size_t num_proc_channels() const = 0;	328 virtual size_t num_proc_channels() const = 0;

313 virtual size_t num_output_channels() const = 0;	329 virtual size_t num_output_channels() const = 0;

314 virtual size_t num_reverse_channels() const = 0;	330 virtual size_t num_reverse_channels() const = 0;

315	331

(...skipping 658 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
974 // This does not impact the size of frames passed to \|ProcessStream()\|.	990 // This does not impact the size of frames passed to \|ProcessStream()\|.

975 virtual int set_frame_size_ms(int size) = 0;	991 virtual int set_frame_size_ms(int size) = 0;

976 virtual int frame_size_ms() const = 0;	992 virtual int frame_size_ms() const = 0;

977	993

978 protected:	994 protected:

979 virtual ~VoiceDetection() {}	995 virtual ~VoiceDetection() {}

980 };	996 };

981 } // namespace webrtc	997 } // namespace webrtc

982	998

983 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_	999 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

OLD	NEW