webrtc/modules/audio_processing/audio_processing_impl.cc - Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Corrected the initial behavior for the peak level estimate, and ensured a nonzero minimum peak leve… Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.h ('k') | webrtc/modules/audio_processing/audio_processing_tests.gypi » ('j') | webrtc/modules/audio_processing/level_controller/biquad_filter.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/audio_processing_impl.h"	11 #include "webrtc/modules/audio_processing/audio_processing_impl.h"

12	12

13 #include <assert.h>	13 #include <assert.h>

14 #include <algorithm>	14 #include <algorithm>

15	15

16 #include "webrtc/base/checks.h"	16 #include "webrtc/base/checks.h"

17 #include "webrtc/base/platform_file.h"	17 #include "webrtc/base/platform_file.h"

18 #include "webrtc/base/trace_event.h"	18 #include "webrtc/base/trace_event.h"

19 #include "webrtc/common_audio/audio_converter.h"	19 #include "webrtc/common_audio/audio_converter.h"

20 #include "webrtc/common_audio/channel_buffer.h"	20 #include "webrtc/common_audio/channel_buffer.h"

21 #include "webrtc/common_audio/include/audio_util.h"	21 #include "webrtc/common_audio/include/audio_util.h"

22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"	22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"

23 #include "webrtc/modules/audio_processing/aec/aec_core.h"	23 #include "webrtc/modules/audio_processing/aec/aec_core.h"

24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"	24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"

	25 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"

25 #include "webrtc/modules/audio_processing/audio_buffer.h"	26 #include "webrtc/modules/audio_processing/audio_buffer.h"

26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"	27 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"

27 #include "webrtc/modules/audio_processing/common.h"	28 #include "webrtc/modules/audio_processing/common.h"

28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"	29 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"

29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"	30 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"

30 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"	31 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"

31 #include "webrtc/modules/audio_processing/gain_control_impl.h"	32 #include "webrtc/modules/audio_processing/gain_control_impl.h"

32 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"	33 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"

33 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	34 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

	35 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"

34 #include "webrtc/modules/audio_processing/level_estimator_impl.h"	36 #include "webrtc/modules/audio_processing/level_estimator_impl.h"

35 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"	37 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"

36 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"	38 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"

37 #include "webrtc/modules/audio_processing/voice_detection_impl.h"	39 #include "webrtc/modules/audio_processing/voice_detection_impl.h"

38 #include "webrtc/modules/include/module_common_types.h"	40 #include "webrtc/modules/include/module_common_types.h"

39 #include "webrtc/system_wrappers/include/file_wrapper.h"	41 #include "webrtc/system_wrappers/include/file_wrapper.h"

40 #include "webrtc/system_wrappers/include/logging.h"	42 #include "webrtc/system_wrappers/include/logging.h"

41 #include "webrtc/system_wrappers/include/metrics.h"	43 #include "webrtc/system_wrappers/include/metrics.h"

42	44

43 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP	45 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
125 std::unique_ptr<TransientSuppressor> transient_suppressor;	127 std::unique_ptr<TransientSuppressor> transient_suppressor;

126 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;	128 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;

127 };	129 };

128	130

129 struct AudioProcessingImpl::ApmPrivateSubmodules {	131 struct AudioProcessingImpl::ApmPrivateSubmodules {

130 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)	132 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)

131 : beamformer(beamformer) {}	133 : beamformer(beamformer) {}

132 // Accessed internally from capture or during initialization	134 // Accessed internally from capture or during initialization

133 std::unique_ptr<Beamformer<float>> beamformer;	135 std::unique_ptr<Beamformer<float>> beamformer;

134 std::unique_ptr<AgcManagerDirect> agc_manager;	136 std::unique_ptr<AgcManagerDirect> agc_manager;

	137 std::unique_ptr<LevelController> level_controller;

135 };	138 };

136	139

137 AudioProcessing* AudioProcessing::Create() {	140 AudioProcessing* AudioProcessing::Create() {

138 Config config;	141 Config config;

139 return Create(config, nullptr);	142 return Create(config, nullptr);

140 }	143 }

141	144

142 AudioProcessing* AudioProcessing::Create(const Config& config) {	145 AudioProcessing* AudioProcessing::Create(const Config& config) {

143 return Create(config, nullptr);	146 return Create(config, nullptr);

144 }	147 }

(...skipping 23 matching lines...) Expand all Loading...
168 config.Get<ExperimentalAgc>().enabled),	171 config.Get<ExperimentalAgc>().enabled),

169 #endif	172 #endif

170 #if defined(WEBRTC_ANDROID) \|\| defined(WEBRTC_IOS)	173 #if defined(WEBRTC_ANDROID) \|\| defined(WEBRTC_IOS)

171 capture_(false,	174 capture_(false,

172 #else	175 #else

173 capture_(config.Get<ExperimentalNs>().enabled,	176 capture_(config.Get<ExperimentalNs>().enabled,

174 #endif	177 #endif

175 config.Get<Beamforming>().array_geometry,	178 config.Get<Beamforming>().array_geometry,

176 config.Get<Beamforming>().target_direction),	179 config.Get<Beamforming>().target_direction),

177 capture_nonlocked_(config.Get<Beamforming>().enabled,	180 capture_nonlocked_(config.Get<Beamforming>().enabled,

178 config.Get<Intelligibility>().enabled)	181 config.Get<Intelligibility>().enabled,

179 {	182 config.Get<LevelControl>().enabled) {

180 {	183 {

181 rtc::CritScope cs_render(&crit_render_);	184 rtc::CritScope cs_render(&crit_render_);

182 rtc::CritScope cs_capture(&crit_capture_);	185 rtc::CritScope cs_capture(&crit_capture_);

183	186

184 public_submodules_->echo_cancellation.reset(	187 public_submodules_->echo_cancellation.reset(

185 new EchoCancellationImpl(&crit_render_, &crit_capture_));	188 new EchoCancellationImpl(&crit_render_, &crit_capture_));

186 public_submodules_->echo_control_mobile.reset(	189 public_submodules_->echo_control_mobile.reset(

187 new EchoControlMobileImpl(&crit_render_, &crit_capture_));	190 new EchoControlMobileImpl(&crit_render_, &crit_capture_));

188 public_submodules_->gain_control.reset(	191 public_submodules_->gain_control.reset(

189 new GainControlImpl(&crit_capture_, &crit_capture_));	192 new GainControlImpl(&crit_capture_, &crit_capture_));

190 public_submodules_->high_pass_filter.reset(	193 public_submodules_->high_pass_filter.reset(

191 new HighPassFilterImpl(&crit_capture_));	194 new HighPassFilterImpl(&crit_capture_));

192 public_submodules_->level_estimator.reset(	195 public_submodules_->level_estimator.reset(

193 new LevelEstimatorImpl(&crit_capture_));	196 new LevelEstimatorImpl(&crit_capture_));

194 public_submodules_->noise_suppression.reset(	197 public_submodules_->noise_suppression.reset(

195 new NoiseSuppressionImpl(&crit_capture_));	198 new NoiseSuppressionImpl(&crit_capture_));

196 public_submodules_->voice_detection.reset(	199 public_submodules_->voice_detection.reset(

197 new VoiceDetectionImpl(&crit_capture_));	200 new VoiceDetectionImpl(&crit_capture_));

198 public_submodules_->gain_control_for_experimental_agc.reset(	201 public_submodules_->gain_control_for_experimental_agc.reset(

199 new GainControlForExperimentalAgc(	202 new GainControlForExperimentalAgc(

200 public_submodules_->gain_control.get(), &crit_capture_));	203 public_submodules_->gain_control.get(), &crit_capture_));

	204

	205 private_submodules_->level_controller.reset(new LevelController());

201 }	206 }

202	207

203 SetExtraOptions(config);	208 SetExtraOptions(config);

204 }	209 }

205	210

206 AudioProcessingImpl::~AudioProcessingImpl() {	211 AudioProcessingImpl::~AudioProcessingImpl() {

207 // Depends on gain_control_ and	212 // Depends on gain_control_ and

208 // public_submodules_->gain_control_for_experimental_agc.	213 // public_submodules_->gain_control_for_experimental_agc.

209 private_submodules_->agc_manager.reset();	214 private_submodules_->agc_manager.reset();

210 // Depends on gain_control_.	215 // Depends on gain_control_.

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
315 InitializeEchoCanceller();	320 InitializeEchoCanceller();

316 InitializeEchoControlMobile();	321 InitializeEchoControlMobile();

317 InitializeExperimentalAgc();	322 InitializeExperimentalAgc();

318 InitializeTransient();	323 InitializeTransient();

319 InitializeBeamformer();	324 InitializeBeamformer();

320 InitializeIntelligibility();	325 InitializeIntelligibility();

321 InitializeHighPassFilter();	326 InitializeHighPassFilter();

322 InitializeNoiseSuppression();	327 InitializeNoiseSuppression();

323 InitializeLevelEstimator();	328 InitializeLevelEstimator();

324 InitializeVoiceDetection();	329 InitializeVoiceDetection();

	330 InitializeLevelController();

325	331

326 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP	332 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

327 if (debug_dump_.debug_file->is_open()) {	333 if (debug_dump_.debug_file->is_open()) {

328 int err = WriteInitMessage();	334 int err = WriteInitMessage();

329 if (err != kNoError) {	335 if (err != kNoError) {

330 return err;	336 return err;

331 }	337 }

332 }	338 }

333 #endif	339 #endif

334	340

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
401	407

402 public_submodules_->echo_cancellation->SetExtraOptions(config);	408 public_submodules_->echo_cancellation->SetExtraOptions(config);

403	409

404 if (capture_.transient_suppressor_enabled !=	410 if (capture_.transient_suppressor_enabled !=

405 config.Get<ExperimentalNs>().enabled) {	411 config.Get<ExperimentalNs>().enabled) {

406 capture_.transient_suppressor_enabled =	412 capture_.transient_suppressor_enabled =

407 config.Get<ExperimentalNs>().enabled;	413 config.Get<ExperimentalNs>().enabled;

408 InitializeTransient();	414 InitializeTransient();

409 }	415 }

410	416

	417 if (capture_nonlocked_.level_controller_enabled !=

	418 config.Get<LevelControl>().enabled) {

	419 capture_nonlocked_.level_controller_enabled =

	420 config.Get<LevelControl>().enabled;

	421 InitializeLevelController();

	422 }

	423

411 if(capture_nonlocked_.intelligibility_enabled !=	424 if(capture_nonlocked_.intelligibility_enabled !=

412 config.Get<Intelligibility>().enabled) {	425 config.Get<Intelligibility>().enabled) {

413 capture_nonlocked_.intelligibility_enabled =	426 capture_nonlocked_.intelligibility_enabled =

414 config.Get<Intelligibility>().enabled;	427 config.Get<Intelligibility>().enabled;

415 InitializeIntelligibility();	428 InitializeIntelligibility();

416 }	429 }

417	430

418 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD	431 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD

419 if (capture_nonlocked_.beamformer_enabled !=	432 if (capture_nonlocked_.beamformer_enabled !=

420 config.Get<Beamforming>().enabled) {	433 config.Get<Beamforming>().enabled) {

(...skipping 331 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
752 ? private_submodules_->agc_manager->voice_probability()	765 ? private_submodules_->agc_manager->voice_probability()

753 : 1.f;	766 : 1.f;

754	767

755 public_submodules_->transient_suppressor->Suppress(	768 public_submodules_->transient_suppressor->Suppress(

756 ca->channels_f()[0], ca->num_frames(), ca->num_channels(),	769 ca->channels_f()[0], ca->num_frames(), ca->num_channels(),

757 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),	770 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),

758 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,	771 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,

759 capture_.key_pressed);	772 capture_.key_pressed);

760 }	773 }

761	774

	775 if (capture_nonlocked_.level_controller_enabled) {

	776 private_submodules_->level_controller->Process(ca);

	777 }

	778

762 // The level estimator operates on the recombined data.	779 // The level estimator operates on the recombined data.

763 public_submodules_->level_estimator->ProcessStream(ca);	780 public_submodules_->level_estimator->ProcessStream(ca);

764	781

765 capture_.was_stream_delay_set = false;	782 capture_.was_stream_delay_set = false;

766 return kNoError;	783 return kNoError;

767 }	784 }

768	785

769 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,	786 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,

770 size_t samples_per_channel,	787 size_t samples_per_channel,

771 int rev_sample_rate_hz,	788 int rev_sample_rate_hz,

(...skipping 339 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1111 }	1128 }

1112	1129

1113 // The capture data is otherwise unchanged.	1130 // The capture data is otherwise unchanged.

1114 return false;	1131 return false;

1115 }	1132 }

1116	1133

1117 bool AudioProcessingImpl::output_copy_needed() const {	1134 bool AudioProcessingImpl::output_copy_needed() const {

1118 // Check if we've upmixed or downmixed the audio.	1135 // Check if we've upmixed or downmixed the audio.

1119 return ((formats_.api_format.output_stream().num_channels() !=	1136 return ((formats_.api_format.output_stream().num_channels() !=

1120 formats_.api_format.input_stream().num_channels()) \|\|	1137 formats_.api_format.input_stream().num_channels()) \|\|

1121 is_fwd_processed() \|\| capture_.transient_suppressor_enabled);	1138 is_fwd_processed() \|\| capture_.transient_suppressor_enabled \|\|

	1139 capture_nonlocked_.level_controller_enabled);

1122 }	1140 }

1123	1141

1124 bool AudioProcessingImpl::fwd_synthesis_needed() const {	1142 bool AudioProcessingImpl::fwd_synthesis_needed() const {

1125 return (is_fwd_processed() &&	1143 return (is_fwd_processed() &&

1126 is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));	1144 is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));

1127 }	1145 }

1128	1146

1129 bool AudioProcessingImpl::fwd_analysis_needed() const {	1147 bool AudioProcessingImpl::fwd_analysis_needed() const {

1130 if (!is_fwd_processed() &&	1148 if (!is_fwd_processed() &&

1131 !public_submodules_->voice_detection->is_enabled() &&	1149 !public_submodules_->voice_detection->is_enabled() &&

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1240 public_submodules_->echo_control_mobile->Initialize(	1258 public_submodules_->echo_control_mobile->Initialize(

1241 proc_split_sample_rate_hz(),	1259 proc_split_sample_rate_hz(),

1242 num_reverse_channels(),	1260 num_reverse_channels(),

1243 num_output_channels());	1261 num_output_channels());

1244 }	1262 }

1245	1263

1246 void AudioProcessingImpl::InitializeLevelEstimator() {	1264 void AudioProcessingImpl::InitializeLevelEstimator() {

1247 public_submodules_->level_estimator->Initialize();	1265 public_submodules_->level_estimator->Initialize();

1248 }	1266 }

1249	1267

	1268 void AudioProcessingImpl::InitializeLevelController() {

	1269 private_submodules_->level_controller->Initialize(proc_sample_rate_hz(),

	1270 num_proc_channels());

	1271 }

	1272

1250 void AudioProcessingImpl::InitializeVoiceDetection() {	1273 void AudioProcessingImpl::InitializeVoiceDetection() {

1251 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());	1274 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());

1252 }	1275 }

1253	1276

1254 void AudioProcessingImpl::MaybeUpdateHistograms() {	1277 void AudioProcessingImpl::MaybeUpdateHistograms() {

1255 static const int kMinDiffDelayMs = 60;	1278 static const int kMinDiffDelayMs = 60;

1256	1279

1257 if (echo_cancellation()->is_enabled()) {	1280 if (echo_cancellation()->is_enabled()) {

1258 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.	1281 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.

1259 // If a stream has echo we know that the echo_cancellation is in process.	1282 // If a stream has echo we know that the echo_cancellation is in process.

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1455 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);	1478 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);

1456	1479

1457 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),	1480 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),

1458 &debug_dump_.num_bytes_left_for_log_,	1481 &debug_dump_.num_bytes_left_for_log_,

1459 &crit_debug_, &debug_dump_.capture));	1482 &crit_debug_, &debug_dump_.capture));

1460 return kNoError;	1483 return kNoError;

1461 }	1484 }

1462 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP	1485 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP

1463	1486

1464 } // namespace webrtc	1487 } // namespace webrtc

OLD	NEW