webrtc/modules/audio_processing/audio_processing_impl.cc - Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Changes in response to reviewer comments Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 13 matching lines...) Expand all Loading...
24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"	24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"

25 #include "webrtc/modules/audio_processing/audio_buffer.h"	25 #include "webrtc/modules/audio_processing/audio_buffer.h"

26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"	26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"

27 #include "webrtc/modules/audio_processing/common.h"	27 #include "webrtc/modules/audio_processing/common.h"

28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"	28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"

29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"	29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"

30 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"	30 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"

31 #include "webrtc/modules/audio_processing/gain_control_impl.h"	31 #include "webrtc/modules/audio_processing/gain_control_impl.h"

32 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"	32 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"

33 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	33 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

	34 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"

34 #include "webrtc/modules/audio_processing/level_estimator_impl.h"	35 #include "webrtc/modules/audio_processing/level_estimator_impl.h"

35 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"	36 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"

36 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"	37 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"

37 #include "webrtc/modules/audio_processing/voice_detection_impl.h"	38 #include "webrtc/modules/audio_processing/voice_detection_impl.h"

38 #include "webrtc/modules/include/module_common_types.h"	39 #include "webrtc/modules/include/module_common_types.h"

39 #include "webrtc/system_wrappers/include/file_wrapper.h"	40 #include "webrtc/system_wrappers/include/file_wrapper.h"

40 #include "webrtc/system_wrappers/include/logging.h"	41 #include "webrtc/system_wrappers/include/logging.h"

41 #include "webrtc/system_wrappers/include/metrics.h"	42 #include "webrtc/system_wrappers/include/metrics.h"

42	43

43 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP	44 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
125 std::unique_ptr<TransientSuppressor> transient_suppressor;	126 std::unique_ptr<TransientSuppressor> transient_suppressor;

126 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;	127 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;

127 };	128 };

128	129

129 struct AudioProcessingImpl::ApmPrivateSubmodules {	130 struct AudioProcessingImpl::ApmPrivateSubmodules {

130 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)	131 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)

131 : beamformer(beamformer) {}	132 : beamformer(beamformer) {}

132 // Accessed internally from capture or during initialization	133 // Accessed internally from capture or during initialization

133 std::unique_ptr<Beamformer<float>> beamformer;	134 std::unique_ptr<Beamformer<float>> beamformer;

134 std::unique_ptr<AgcManagerDirect> agc_manager;	135 std::unique_ptr<AgcManagerDirect> agc_manager;

	136 std::unique_ptr<LevelController> level_controller;

135 };	137 };

136	138

137 AudioProcessing* AudioProcessing::Create() {	139 AudioProcessing* AudioProcessing::Create() {

138 Config config;	140 Config config;

139 return Create(config, nullptr);	141 return Create(config, nullptr);

140 }	142 }

141	143

142 AudioProcessing* AudioProcessing::Create(const Config& config) {	144 AudioProcessing* AudioProcessing::Create(const Config& config) {

143 return Create(config, nullptr);	145 return Create(config, nullptr);

144 }	146 }

(...skipping 23 matching lines...) Expand all Loading...
168 config.Get<ExperimentalAgc>().enabled),	170 config.Get<ExperimentalAgc>().enabled),

169 #endif	171 #endif

170 #if defined(WEBRTC_ANDROID) \|\| defined(WEBRTC_IOS)	172 #if defined(WEBRTC_ANDROID) \|\| defined(WEBRTC_IOS)

171 capture_(false,	173 capture_(false,

172 #else	174 #else

173 capture_(config.Get<ExperimentalNs>().enabled,	175 capture_(config.Get<ExperimentalNs>().enabled,

174 #endif	176 #endif

175 config.Get<Beamforming>().array_geometry,	177 config.Get<Beamforming>().array_geometry,

176 config.Get<Beamforming>().target_direction),	178 config.Get<Beamforming>().target_direction),

177 capture_nonlocked_(config.Get<Beamforming>().enabled,	179 capture_nonlocked_(config.Get<Beamforming>().enabled,

178 config.Get<Intelligibility>().enabled)	180 config.Get<Intelligibility>().enabled,

179 {	181 config.Get<LevelControl>().enabled) {

180 {	182 {

181 rtc::CritScope cs_render(&crit_render_);	183 rtc::CritScope cs_render(&crit_render_);

182 rtc::CritScope cs_capture(&crit_capture_);	184 rtc::CritScope cs_capture(&crit_capture_);

183	185

184 public_submodules_->echo_cancellation.reset(	186 public_submodules_->echo_cancellation.reset(

185 new EchoCancellationImpl(&crit_render_, &crit_capture_));	187 new EchoCancellationImpl(&crit_render_, &crit_capture_));

186 public_submodules_->echo_control_mobile.reset(	188 public_submodules_->echo_control_mobile.reset(

187 new EchoControlMobileImpl(&crit_render_, &crit_capture_));	189 new EchoControlMobileImpl(&crit_render_, &crit_capture_));

188 public_submodules_->gain_control.reset(	190 public_submodules_->gain_control.reset(

189 new GainControlImpl(&crit_capture_, &crit_capture_));	191 new GainControlImpl(&crit_capture_, &crit_capture_));

190 public_submodules_->high_pass_filter.reset(	192 public_submodules_->high_pass_filter.reset(

191 new HighPassFilterImpl(&crit_capture_));	193 new HighPassFilterImpl(&crit_capture_));

192 public_submodules_->level_estimator.reset(	194 public_submodules_->level_estimator.reset(

193 new LevelEstimatorImpl(&crit_capture_));	195 new LevelEstimatorImpl(&crit_capture_));

194 public_submodules_->noise_suppression.reset(	196 public_submodules_->noise_suppression.reset(

195 new NoiseSuppressionImpl(&crit_capture_));	197 new NoiseSuppressionImpl(&crit_capture_));

196 public_submodules_->voice_detection.reset(	198 public_submodules_->voice_detection.reset(

197 new VoiceDetectionImpl(&crit_capture_));	199 new VoiceDetectionImpl(&crit_capture_));

198 public_submodules_->gain_control_for_experimental_agc.reset(	200 public_submodules_->gain_control_for_experimental_agc.reset(

199 new GainControlForExperimentalAgc(	201 new GainControlForExperimentalAgc(

200 public_submodules_->gain_control.get(), &crit_capture_));	202 public_submodules_->gain_control.get(), &crit_capture_));

	203

	204 private_submodules_->level_controller.reset(new LevelController());

201 }	205 }

202	206

203 SetExtraOptions(config);	207 SetExtraOptions(config);

204 }	208 }

205	209

206 AudioProcessingImpl::~AudioProcessingImpl() {	210 AudioProcessingImpl::~AudioProcessingImpl() {

207 // Depends on gain_control_ and	211 // Depends on gain_control_ and

208 // public_submodules_->gain_control_for_experimental_agc.	212 // public_submodules_->gain_control_for_experimental_agc.

209 private_submodules_->agc_manager.reset();	213 private_submodules_->agc_manager.reset();

210 // Depends on gain_control_.	214 // Depends on gain_control_.

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
315 InitializeEchoCanceller();	319 InitializeEchoCanceller();

316 InitializeEchoControlMobile();	320 InitializeEchoControlMobile();

317 InitializeExperimentalAgc();	321 InitializeExperimentalAgc();

318 InitializeTransient();	322 InitializeTransient();

319 InitializeBeamformer();	323 InitializeBeamformer();

320 InitializeIntelligibility();	324 InitializeIntelligibility();

321 InitializeHighPassFilter();	325 InitializeHighPassFilter();

322 InitializeNoiseSuppression();	326 InitializeNoiseSuppression();

323 InitializeLevelEstimator();	327 InitializeLevelEstimator();

324 InitializeVoiceDetection();	328 InitializeVoiceDetection();

	329 InitializeLevelController();

325	330

326 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP	331 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

327 if (debug_dump_.debug_file->is_open()) {	332 if (debug_dump_.debug_file->is_open()) {

328 int err = WriteInitMessage();	333 int err = WriteInitMessage();

329 if (err != kNoError) {	334 if (err != kNoError) {

330 return err;	335 return err;

331 }	336 }

332 }	337 }

333 #endif	338 #endif

334	339

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
401	406

402 public_submodules_->echo_cancellation->SetExtraOptions(config);	407 public_submodules_->echo_cancellation->SetExtraOptions(config);

403	408

404 if (capture_.transient_suppressor_enabled !=	409 if (capture_.transient_suppressor_enabled !=

405 config.Get<ExperimentalNs>().enabled) {	410 config.Get<ExperimentalNs>().enabled) {

406 capture_.transient_suppressor_enabled =	411 capture_.transient_suppressor_enabled =

407 config.Get<ExperimentalNs>().enabled;	412 config.Get<ExperimentalNs>().enabled;

408 InitializeTransient();	413 InitializeTransient();

409 }	414 }

410	415

	416 if (capture_nonlocked_.level_controller_enabled !=

	417 config.Get<LevelControl>().enabled) {

	418 capture_nonlocked_.level_controller_enabled =

	419 config.Get<LevelControl>().enabled;

	420 InitializeLevelController();

	421 }

	422

411 if(capture_nonlocked_.intelligibility_enabled !=	423 if(capture_nonlocked_.intelligibility_enabled !=

412 config.Get<Intelligibility>().enabled) {	424 config.Get<Intelligibility>().enabled) {

413 capture_nonlocked_.intelligibility_enabled =	425 capture_nonlocked_.intelligibility_enabled =

414 config.Get<Intelligibility>().enabled;	426 config.Get<Intelligibility>().enabled;

415 InitializeIntelligibility();	427 InitializeIntelligibility();

416 }	428 }

417	429

418 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD	430 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD

419 if (capture_nonlocked_.beamformer_enabled !=	431 if (capture_nonlocked_.beamformer_enabled !=

420 config.Get<Beamforming>().enabled) {	432 config.Get<Beamforming>().enabled) {

(...skipping 331 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
752 ? private_submodules_->agc_manager->voice_probability()	764 ? private_submodules_->agc_manager->voice_probability()

753 : 1.f;	765 : 1.f;

754	766

755 public_submodules_->transient_suppressor->Suppress(	767 public_submodules_->transient_suppressor->Suppress(

756 ca->channels_f()[0], ca->num_frames(), ca->num_channels(),	768 ca->channels_f()[0], ca->num_frames(), ca->num_channels(),

757 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),	769 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),

758 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,	770 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,

759 capture_.key_pressed);	771 capture_.key_pressed);

760 }	772 }

761	773

	774 if (capture_nonlocked_.level_controller_enabled) {

	775 private_submodules_->level_controller->Process(ca);
	aluebs-webrtc 2016/06/28 22:45:02 I am not sure how this component works (it is not I am not sure how this component works (it is not in the CL description or documented in the header file), but if it applies an additional gain to the signal, we probably want to compensate for that in the gain_db parameter when calling intelligibility_enhancer->SetCaptureNoiseEstimate() in line 728. peah-webrtc 2016/06/29 06:22:09 The component indeed applies an additional gain. Show quoted text On 2016/06/28 22:45:02, aluebs-webrtc wrote: > I am not sure how this component works (it is not in the CL description or > documented in the header file), but if it applies an additional gain to the > signal, we probably want to compensate for that in the gain_db parameter when > calling intelligibility_enhancer->SetCaptureNoiseEstimate() in line 728. The component indeed applies an additional gain. I'll add an API call for obtaining the latest applied gain, but I'll let you use that gain in the intelligibility enhancer in another CL since 1) I don't really understand how that gain is to be used in the intelligibility enhancer. 2) It is nice to make this CL only concern the level control and not also affect the intelligibility enhancer. 3) The intelligibility enhancer is currently not active by default so the addition of the level control will not cause any downstream breakages. Lets discuss it further offline if you want. aluebs-webrtc 2016/06/29 16:40:28 2) This CL concerns the IE, because it applies a g Show quoted text On 2016/06/29 06:22:09, peah-webrtc wrote: > On 2016/06/28 22:45:02, aluebs-webrtc wrote: > > I am not sure how this component works (it is not in the CL description or > > documented in the header file), but if it applies an additional gain to the > > signal, we probably want to compensate for that in the gain_db parameter when > > calling intelligibility_enhancer->SetCaptureNoiseEstimate() in line 728. > > The component indeed applies an additional gain. > > I'll add an API call for obtaining the latest applied gain, but I'll let you use > that gain in the intelligibility enhancer in another CL since > 1) I don't really understand how that gain is to be used in the intelligibility > enhancer. > 2) It is nice to make this CL only concern the level control and not also affect > the intelligibility enhancer. > 3) The intelligibility enhancer is currently not active by default so the > addition of the level control will not cause any downstream breakages. > > Lets discuss it further offline if you want. 2) This CL concerns the IE, because it applies a gain without compensating for it. 3) It can be manually enabled and some of us are using it already. But I can do a separate CL if you want. peah-webrtc 2016/06/29 20:39:59 I definitely see that it affects the IE, but I thi Show quoted text On 2016/06/29 16:40:28, aluebs-webrtc wrote: > On 2016/06/29 06:22:09, peah-webrtc wrote: > > On 2016/06/28 22:45:02, aluebs-webrtc wrote: > > > I am not sure how this component works (it is not in the CL description or > > > documented in the header file), but if it applies an additional gain to the > > > signal, we probably want to compensate for that in the gain_db parameter > when > > > calling intelligibility_enhancer->SetCaptureNoiseEstimate() in line 728. > > > > The component indeed applies an additional gain. > > > > I'll add an API call for obtaining the latest applied gain, but I'll let you > use > > that gain in the intelligibility enhancer in another CL since > > 1) I don't really understand how that gain is to be used in the > intelligibility > > enhancer. > > 2) It is nice to make this CL only concern the level control and not also > affect > > the intelligibility enhancer. > > 3) The intelligibility enhancer is currently not active by default so the > > addition of the level control will not cause any downstream breakages. > > > > Lets discuss it further offline if you want. > > 2) This CL concerns the IE, because it applies a gain without compensating for > it. > 3) It can be manually enabled and some of us are using it already. > But I can do a separate CL if you want. I definitely see that it affects the IE, but I think it is better if you do that change as you know how that is designed. Furthermore, I'd expect that that change needs to be verified carefully, as the level controller may change the power of the signal by as much as 20 dB. An alternative is to ensure that the IE and the level control are never active at the same time.
	776 }

	777

762 // The level estimator operates on the recombined data.	778 // The level estimator operates on the recombined data.

763 public_submodules_->level_estimator->ProcessStream(ca);	779 public_submodules_->level_estimator->ProcessStream(ca);

764	780

765 capture_.was_stream_delay_set = false;	781 capture_.was_stream_delay_set = false;

766 return kNoError;	782 return kNoError;

767 }	783 }

768	784

769 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,	785 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,

770 size_t samples_per_channel,	786 size_t samples_per_channel,

771 int rev_sample_rate_hz,	787 int rev_sample_rate_hz,

(...skipping 339 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1111 }	1127 }

1112	1128

1113 // The capture data is otherwise unchanged.	1129 // The capture data is otherwise unchanged.

1114 return false;	1130 return false;

1115 }	1131 }

1116	1132

1117 bool AudioProcessingImpl::output_copy_needed() const {	1133 bool AudioProcessingImpl::output_copy_needed() const {

1118 // Check if we've upmixed or downmixed the audio.	1134 // Check if we've upmixed or downmixed the audio.

1119 return ((formats_.api_format.output_stream().num_channels() !=	1135 return ((formats_.api_format.output_stream().num_channels() !=

1120 formats_.api_format.input_stream().num_channels()) \|\|	1136 formats_.api_format.input_stream().num_channels()) \|\|

1121 is_fwd_processed() \|\| capture_.transient_suppressor_enabled);	1137 is_fwd_processed() \|\| capture_.transient_suppressor_enabled \|\|

	1138 capture_nonlocked_.level_controller_enabled);

1122 }	1139 }

1123	1140

1124 bool AudioProcessingImpl::fwd_synthesis_needed() const {	1141 bool AudioProcessingImpl::fwd_synthesis_needed() const {

1125 return (is_fwd_processed() &&	1142 return (is_fwd_processed() &&

1126 is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));	1143 is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));

1127 }	1144 }

1128	1145

1129 bool AudioProcessingImpl::fwd_analysis_needed() const {	1146 bool AudioProcessingImpl::fwd_analysis_needed() const {

1130 if (!is_fwd_processed() &&	1147 if (!is_fwd_processed() &&

1131 !public_submodules_->voice_detection->is_enabled() &&	1148 !public_submodules_->voice_detection->is_enabled() &&

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1240 public_submodules_->echo_control_mobile->Initialize(	1257 public_submodules_->echo_control_mobile->Initialize(

1241 proc_split_sample_rate_hz(),	1258 proc_split_sample_rate_hz(),

1242 num_reverse_channels(),	1259 num_reverse_channels(),

1243 num_output_channels());	1260 num_output_channels());

1244 }	1261 }

1245	1262

1246 void AudioProcessingImpl::InitializeLevelEstimator() {	1263 void AudioProcessingImpl::InitializeLevelEstimator() {

1247 public_submodules_->level_estimator->Initialize();	1264 public_submodules_->level_estimator->Initialize();

1248 }	1265 }

1249	1266

	1267 void AudioProcessingImpl::InitializeLevelController() {

	1268 private_submodules_->level_controller->Initialize(proc_sample_rate_hz());

	1269 }

	1270

1250 void AudioProcessingImpl::InitializeVoiceDetection() {	1271 void AudioProcessingImpl::InitializeVoiceDetection() {

1251 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());	1272 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());

1252 }	1273 }

1253	1274

1254 void AudioProcessingImpl::MaybeUpdateHistograms() {	1275 void AudioProcessingImpl::MaybeUpdateHistograms() {

1255 static const int kMinDiffDelayMs = 60;	1276 static const int kMinDiffDelayMs = 60;

1256	1277

1257 if (echo_cancellation()->is_enabled()) {	1278 if (echo_cancellation()->is_enabled()) {

1258 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.	1279 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.

1259 // If a stream has echo we know that the echo_cancellation is in process.	1280 // If a stream has echo we know that the echo_cancellation is in process.

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1455 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);	1476 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);

1456	1477

1457 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),	1478 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),

1458 &debug_dump_.num_bytes_left_for_log_,	1479 &debug_dump_.num_bytes_left_for_log_,

1459 &crit_debug_, &debug_dump_.capture));	1480 &crit_debug_, &debug_dump_.capture));

1460 return kNoError;	1481 return kNoError;

1461 }	1482 }

1462 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP	1483 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP

1463	1484

1464 } // namespace webrtc	1485 } // namespace webrtc

OLD	NEW