webrtc/modules/audio_processing/audio_processing_impl.cc - Issue 2272423003: Deactivated the intelligibility enhancement functionality by default

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2272423003: Deactivated the intelligibility enhancement functionality by default (Closed)

Patch Set: Changed name of build file flags Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 12 matching lines...) Expand all Loading...
23 #include "webrtc/modules/audio_processing/aec/aec_core.h"	23 #include "webrtc/modules/audio_processing/aec/aec_core.h"

24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"	24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"

25 #include "webrtc/modules/audio_processing/audio_buffer.h"	25 #include "webrtc/modules/audio_processing/audio_buffer.h"

26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"	26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"

27 #include "webrtc/modules/audio_processing/common.h"	27 #include "webrtc/modules/audio_processing/common.h"

28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"	28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"

29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"	29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"

30 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"	30 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"

31 #include "webrtc/modules/audio_processing/gain_control_impl.h"	31 #include "webrtc/modules/audio_processing/gain_control_impl.h"

32 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"	32 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"

	33 #if WEBRTC_INTELLIGIBILITY_ENHANCER

33 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	34 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

	35 #endif

34 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"	36 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"

35 #include "webrtc/modules/audio_processing/level_estimator_impl.h"	37 #include "webrtc/modules/audio_processing/level_estimator_impl.h"

36 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"	38 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"

37 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"	39 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"

38 #include "webrtc/modules/audio_processing/voice_detection_impl.h"	40 #include "webrtc/modules/audio_processing/voice_detection_impl.h"

39 #include "webrtc/modules/include/module_common_types.h"	41 #include "webrtc/modules/include/module_common_types.h"

40 #include "webrtc/system_wrappers/include/file_wrapper.h"	42 #include "webrtc/system_wrappers/include/file_wrapper.h"

41 #include "webrtc/system_wrappers/include/logging.h"	43 #include "webrtc/system_wrappers/include/logging.h"

42 #include "webrtc/system_wrappers/include/metrics.h"	44 #include "webrtc/system_wrappers/include/metrics.h"

43	45

44 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP	46 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

45 // Files generated at build-time by the protobuf compiler.	47 // Files generated at build-time by the protobuf compiler.

46 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD	48 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD

47 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h"	49 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h"

48 #else	50 #else

49 #include "webrtc/modules/audio_processing/debug.pb.h"	51 #include "webrtc/modules/audio_processing/debug.pb.h"

50 #endif	52 #endif

51 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP	53 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP

52	54

	55 // Check to verify that the define for the intelligibility enhancer is properly

	56 // set.

	57 #if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) \|\| \

	58 (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \

	59 WEBRTC_INTELLIGIBILITY_ENHANCER != 1)

	60 #error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"

	61 #endif

	62

53 #define RETURN_ON_ERR(expr) \	63 #define RETURN_ON_ERR(expr) \

54 do { \	64 do { \

55 int err = (expr); \	65 int err = (expr); \

56 if (err != kNoError) { \	66 if (err != kNoError) { \

57 return err; \	67 return err; \

58 } \	68 } \

59 } while (0)	69 } while (0)

60	70

61 namespace webrtc {	71 namespace webrtc {

62	72

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
117 std::unique_ptr<GainControlImpl> gain_control;	127 std::unique_ptr<GainControlImpl> gain_control;

118 std::unique_ptr<HighPassFilterImpl> high_pass_filter;	128 std::unique_ptr<HighPassFilterImpl> high_pass_filter;

119 std::unique_ptr<LevelEstimatorImpl> level_estimator;	129 std::unique_ptr<LevelEstimatorImpl> level_estimator;

120 std::unique_ptr<NoiseSuppressionImpl> noise_suppression;	130 std::unique_ptr<NoiseSuppressionImpl> noise_suppression;

121 std::unique_ptr<VoiceDetectionImpl> voice_detection;	131 std::unique_ptr<VoiceDetectionImpl> voice_detection;

122 std::unique_ptr<GainControlForExperimentalAgc>	132 std::unique_ptr<GainControlForExperimentalAgc>

123 gain_control_for_experimental_agc;	133 gain_control_for_experimental_agc;

124	134

125 // Accessed internally from both render and capture.	135 // Accessed internally from both render and capture.

126 std::unique_ptr<TransientSuppressor> transient_suppressor;	136 std::unique_ptr<TransientSuppressor> transient_suppressor;

	137 #if WEBRTC_INTELLIGIBILITY_ENHANCER

127 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;	138 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;

	139 #endif

128 };	140 };

129	141

130 struct AudioProcessingImpl::ApmPrivateSubmodules {	142 struct AudioProcessingImpl::ApmPrivateSubmodules {

131 explicit ApmPrivateSubmodules(NonlinearBeamformer* beamformer)	143 explicit ApmPrivateSubmodules(NonlinearBeamformer* beamformer)

132 : beamformer(beamformer) {}	144 : beamformer(beamformer) {}

133 // Accessed internally from capture or during initialization	145 // Accessed internally from capture or during initialization

134 std::unique_ptr<NonlinearBeamformer> beamformer;	146 std::unique_ptr<NonlinearBeamformer> beamformer;

135 std::unique_ptr<AgcManagerDirect> agc_manager;	147 std::unique_ptr<AgcManagerDirect> agc_manager;

136 std::unique_ptr<LevelController> level_controller;	148 std::unique_ptr<LevelController> level_controller;

137 };	149 };

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
314 capture_nonlocked_.fwd_proc_format.num_frames(),	326 capture_nonlocked_.fwd_proc_format.num_frames(),

315 fwd_audio_buffer_channels,	327 fwd_audio_buffer_channels,

316 formats_.api_format.output_stream().num_frames()));	328 formats_.api_format.output_stream().num_frames()));

317	329

318 InitializeGainController();	330 InitializeGainController();

319 InitializeEchoCanceller();	331 InitializeEchoCanceller();

320 InitializeEchoControlMobile();	332 InitializeEchoControlMobile();

321 InitializeExperimentalAgc();	333 InitializeExperimentalAgc();

322 InitializeTransient();	334 InitializeTransient();

323 InitializeBeamformer();	335 InitializeBeamformer();

	336 #if WEBRTC_INTELLIGIBILITY_ENHANCER

324 InitializeIntelligibility();	337 InitializeIntelligibility();

	338 #endif

325 InitializeHighPassFilter();	339 InitializeHighPassFilter();

326 InitializeNoiseSuppression();	340 InitializeNoiseSuppression();

327 InitializeLevelEstimator();	341 InitializeLevelEstimator();

328 InitializeVoiceDetection();	342 InitializeVoiceDetection();

329 InitializeLevelController();	343 InitializeLevelController();

330	344

331 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP	345 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP

332 if (debug_dump_.debug_file->is_open()) {	346 if (debug_dump_.debug_file->is_open()) {

333 int err = WriteInitMessage();	347 int err = WriteInitMessage();

334 if (err != kNoError) {	348 if (err != kNoError) {

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
416 if (capture_nonlocked_.level_controller_enabled !=	430 if (capture_nonlocked_.level_controller_enabled !=

417 config.Get<LevelControl>().enabled) {	431 config.Get<LevelControl>().enabled) {

418 capture_nonlocked_.level_controller_enabled =	432 capture_nonlocked_.level_controller_enabled =

419 config.Get<LevelControl>().enabled;	433 config.Get<LevelControl>().enabled;

420 LOG(LS_INFO) << "Level controller activated: "	434 LOG(LS_INFO) << "Level controller activated: "

421 << config.Get<LevelControl>().enabled;	435 << config.Get<LevelControl>().enabled;

422	436

423 InitializeLevelController();	437 InitializeLevelController();

424 }	438 }

425	439

	440 #if WEBRTC_INTELLIGIBILITY_ENHANCER

426 if(capture_nonlocked_.intelligibility_enabled !=	441 if(capture_nonlocked_.intelligibility_enabled !=

427 config.Get<Intelligibility>().enabled) {	442 config.Get<Intelligibility>().enabled) {

428 capture_nonlocked_.intelligibility_enabled =	443 capture_nonlocked_.intelligibility_enabled =

429 config.Get<Intelligibility>().enabled;	444 config.Get<Intelligibility>().enabled;

430 InitializeIntelligibility();	445 InitializeIntelligibility();

431 }	446 }

	447 #endif

432	448

433 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD	449 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD

434 if (capture_nonlocked_.beamformer_enabled !=	450 if (capture_nonlocked_.beamformer_enabled !=

435 config.Get<Beamforming>().enabled) {	451 config.Get<Beamforming>().enabled) {

436 capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;	452 capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;

437 if (config.Get<Beamforming>().array_geometry.size() > 1) {	453 if (config.Get<Beamforming>().array_geometry.size() > 1) {

438 capture_.array_geometry = config.Get<Beamforming>().array_geometry;	454 capture_.array_geometry = config.Get<Beamforming>().array_geometry;

439 }	455 }

440 capture_.target_direction = config.Get<Beamforming>().target_direction;	456 capture_.target_direction = config.Get<Beamforming>().target_direction;

441 InitializeBeamformer();	457 InitializeBeamformer();

(...skipping 276 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
718 }	734 }

719	735

720 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio(	736 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio(

721 ca, stream_delay_ms()));	737 ca, stream_delay_ms()));

722	738

723 if (public_submodules_->echo_control_mobile->is_enabled() &&	739 if (public_submodules_->echo_control_mobile->is_enabled() &&

724 public_submodules_->noise_suppression->is_enabled()) {	740 public_submodules_->noise_suppression->is_enabled()) {

725 ca->CopyLowPassToReference();	741 ca->CopyLowPassToReference();

726 }	742 }

727 public_submodules_->noise_suppression->ProcessCaptureAudio(ca);	743 public_submodules_->noise_suppression->ProcessCaptureAudio(ca);

	744 #if WEBRTC_INTELLIGIBILITY_ENHANCER

728 if (capture_nonlocked_.intelligibility_enabled) {	745 if (capture_nonlocked_.intelligibility_enabled) {

729 RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());	746 RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());

730 int gain_db = public_submodules_->gain_control->is_enabled() ?	747 int gain_db = public_submodules_->gain_control->is_enabled() ?

731 public_submodules_->gain_control->compression_gain_db() :	748 public_submodules_->gain_control->compression_gain_db() :

732 0;	749 0;

733 float gain = std::pow(10.f, gain_db / 20.f);	750 float gain = std::pow(10.f, gain_db / 20.f);

734 gain *= capture_nonlocked_.level_controller_enabled ?	751 gain *= capture_nonlocked_.level_controller_enabled ?

735 private_submodules_->level_controller->GetLastGain() :	752 private_submodules_->level_controller->GetLastGain() :

736 1.f;	753 1.f;

737 public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(	754 public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(

738 public_submodules_->noise_suppression->NoiseEstimate(), gain);	755 public_submodules_->noise_suppression->NoiseEstimate(), gain);

739 }	756 }

	757 #endif

740	758

741 // Ensure that the stream delay was set before the call to the	759 // Ensure that the stream delay was set before the call to the

742 // AECM ProcessCaptureAudio function.	760 // AECM ProcessCaptureAudio function.

743 if (public_submodules_->echo_control_mobile->is_enabled() &&	761 if (public_submodules_->echo_control_mobile->is_enabled() &&

744 !was_stream_delay_set()) {	762 !was_stream_delay_set()) {

745 return AudioProcessing::kStreamParameterNotSetError;	763 return AudioProcessing::kStreamParameterNotSetError;

746 }	764 }

747	765

748 RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio(	766 RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio(

749 ca, stream_delay_ms()));	767 ca, stream_delay_ms()));

(...skipping 179 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
929 }	947 }

930 return kNoError;	948 return kNoError;

931 }	949 }

932	950

933 int AudioProcessingImpl::ProcessReverseStreamLocked() {	951 int AudioProcessingImpl::ProcessReverseStreamLocked() {

934 AudioBuffer* ra = render_.render_audio.get(); // For brevity.	952 AudioBuffer* ra = render_.render_audio.get(); // For brevity.

935 if (rev_analysis_needed()) {	953 if (rev_analysis_needed()) {

936 ra->SplitIntoFrequencyBands();	954 ra->SplitIntoFrequencyBands();

937 }	955 }

938	956

	957 #if WEBRTC_INTELLIGIBILITY_ENHANCER

939 if (capture_nonlocked_.intelligibility_enabled) {	958 if (capture_nonlocked_.intelligibility_enabled) {

940 public_submodules_->intelligibility_enhancer->ProcessRenderAudio(	959 public_submodules_->intelligibility_enhancer->ProcessRenderAudio(

941 ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,	960 ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,

942 ra->num_channels());	961 ra->num_channels());

943 }	962 }

	963 #endif

944	964

945 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessRenderAudio(ra));	965 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessRenderAudio(ra));

946 RETURN_ON_ERR(	966 RETURN_ON_ERR(

947 public_submodules_->echo_control_mobile->ProcessRenderAudio(ra));	967 public_submodules_->echo_control_mobile->ProcessRenderAudio(ra));

948 if (!constants_.use_experimental_agc) {	968 if (!constants_.use_experimental_agc) {

949 RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));	969 RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));

950 }	970 }

951	971

952 if (rev_synthesis_needed()) {	972 if (rev_synthesis_needed()) {

953 ra->MergeFrequencyBands();	973 ra->MergeFrequencyBands();

(...skipping 211 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1165 } else if (is_multi_band(	1185 } else if (is_multi_band(

1166 capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {	1186 capture_nonlocked_.fwd_proc_format.sample_rate_hz())) {

1167 // Something besides public_submodules_->level_estimator is enabled, and we	1187 // Something besides public_submodules_->level_estimator is enabled, and we

1168 // have super-wb.	1188 // have super-wb.

1169 return true;	1189 return true;

1170 }	1190 }

1171 return false;	1191 return false;

1172 }	1192 }

1173	1193

1174 bool AudioProcessingImpl::is_rev_processed() const {	1194 bool AudioProcessingImpl::is_rev_processed() const {

	1195 #if WEBRTC_INTELLIGIBILITY_ENHANCER

1175 return capture_nonlocked_.intelligibility_enabled;	1196 return capture_nonlocked_.intelligibility_enabled;

	1197 #else

	1198 return false;

	1199 #endif

1176 }	1200 }

1177	1201

1178 bool AudioProcessingImpl::rev_synthesis_needed() const {	1202 bool AudioProcessingImpl::rev_synthesis_needed() const {

1179 return (is_rev_processed() &&	1203 return (is_rev_processed() &&

1180 is_multi_band(formats_.rev_proc_format.sample_rate_hz()));	1204 is_multi_band(formats_.rev_proc_format.sample_rate_hz()));

1181 }	1205 }

1182	1206

1183 bool AudioProcessingImpl::rev_analysis_needed() const {	1207 bool AudioProcessingImpl::rev_analysis_needed() const {

1184 return is_multi_band(formats_.rev_proc_format.sample_rate_hz()) &&	1208 return is_multi_band(formats_.rev_proc_format.sample_rate_hz()) &&

1185 (is_rev_processed() \|\|	1209 (is_rev_processed() \|\|

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1230 if (!private_submodules_->beamformer) {	1254 if (!private_submodules_->beamformer) {

1231 private_submodules_->beamformer.reset(new NonlinearBeamformer(	1255 private_submodules_->beamformer.reset(new NonlinearBeamformer(

1232 capture_.array_geometry, 1u, capture_.target_direction));	1256 capture_.array_geometry, 1u, capture_.target_direction));

1233 }	1257 }

1234 private_submodules_->beamformer->Initialize(kChunkSizeMs,	1258 private_submodules_->beamformer->Initialize(kChunkSizeMs,

1235 capture_nonlocked_.split_rate);	1259 capture_nonlocked_.split_rate);

1236 }	1260 }

1237 }	1261 }

1238	1262

1239 void AudioProcessingImpl::InitializeIntelligibility() {	1263 void AudioProcessingImpl::InitializeIntelligibility() {

	1264 #if WEBRTC_INTELLIGIBILITY_ENHANCER

1240 if (capture_nonlocked_.intelligibility_enabled) {	1265 if (capture_nonlocked_.intelligibility_enabled) {

1241 public_submodules_->intelligibility_enhancer.reset(	1266 public_submodules_->intelligibility_enhancer.reset(

1242 new IntelligibilityEnhancer(capture_nonlocked_.split_rate,	1267 new IntelligibilityEnhancer(capture_nonlocked_.split_rate,

1243 render_.render_audio->num_channels(),	1268 render_.render_audio->num_channels(),

1244 NoiseSuppressionImpl::num_noise_bins()));	1269 NoiseSuppressionImpl::num_noise_bins()));

1245 }	1270 }

	1271 #endif

1246 }	1272 }

1247	1273

1248 void AudioProcessingImpl::InitializeHighPassFilter() {	1274 void AudioProcessingImpl::InitializeHighPassFilter() {

1249 public_submodules_->high_pass_filter->Initialize(num_proc_channels(),	1275 public_submodules_->high_pass_filter->Initialize(num_proc_channels(),

1250 proc_sample_rate_hz());	1276 proc_sample_rate_hz());

1251 }	1277 }

1252	1278

1253 void AudioProcessingImpl::InitializeNoiseSuppression() {	1279 void AudioProcessingImpl::InitializeNoiseSuppression() {

1254 public_submodules_->noise_suppression->Initialize(num_proc_channels(),	1280 public_submodules_->noise_suppression->Initialize(num_proc_channels(),

1255 proc_sample_rate_hz());	1281 proc_sample_rate_hz());

(...skipping 236 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1492 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);	1518 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);

1493	1519

1494 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),	1520 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),

1495 &debug_dump_.num_bytes_left_for_log_,	1521 &debug_dump_.num_bytes_left_for_log_,

1496 &crit_debug_, &debug_dump_.capture));	1522 &crit_debug_, &debug_dump_.capture));

1497 return kNoError;	1523 return kNoError;

1498 }	1524 }

1499 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP	1525 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP

1500	1526

1501 } // namespace webrtc	1527 } // namespace webrtc

OLD	NEW