Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1507)

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 2090583002: New module for the adaptive level controlling functionality in the audio processing module (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Corrected the initial behavior for the peak level estimate, and ensured a nonzero minimum peak leveā€¦ Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/audio_processing_impl.h" 11 #include "webrtc/modules/audio_processing/audio_processing_impl.h"
12 12
13 #include <assert.h> 13 #include <assert.h>
14 #include <algorithm> 14 #include <algorithm>
15 15
16 #include "webrtc/base/checks.h" 16 #include "webrtc/base/checks.h"
17 #include "webrtc/base/platform_file.h" 17 #include "webrtc/base/platform_file.h"
18 #include "webrtc/base/trace_event.h" 18 #include "webrtc/base/trace_event.h"
19 #include "webrtc/common_audio/audio_converter.h" 19 #include "webrtc/common_audio/audio_converter.h"
20 #include "webrtc/common_audio/channel_buffer.h" 20 #include "webrtc/common_audio/channel_buffer.h"
21 #include "webrtc/common_audio/include/audio_util.h" 21 #include "webrtc/common_audio/include/audio_util.h"
22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h" 22 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar y.h"
23 #include "webrtc/modules/audio_processing/aec/aec_core.h" 23 #include "webrtc/modules/audio_processing/aec/aec_core.h"
24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" 24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
25 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"
25 #include "webrtc/modules/audio_processing/audio_buffer.h" 26 #include "webrtc/modules/audio_processing/audio_buffer.h"
26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" 27 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
27 #include "webrtc/modules/audio_processing/common.h" 28 #include "webrtc/modules/audio_processing/common.h"
28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" 29 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" 30 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
30 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h" 31 #include "webrtc/modules/audio_processing/gain_control_for_experimental_agc.h"
31 #include "webrtc/modules/audio_processing/gain_control_impl.h" 32 #include "webrtc/modules/audio_processing/gain_control_impl.h"
32 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" 33 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
33 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h" 34 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
35 #include "webrtc/modules/audio_processing/level_controller/level_controller.h"
34 #include "webrtc/modules/audio_processing/level_estimator_impl.h" 36 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
35 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" 37 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
36 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" 38 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
37 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 39 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
38 #include "webrtc/modules/include/module_common_types.h" 40 #include "webrtc/modules/include/module_common_types.h"
39 #include "webrtc/system_wrappers/include/file_wrapper.h" 41 #include "webrtc/system_wrappers/include/file_wrapper.h"
40 #include "webrtc/system_wrappers/include/logging.h" 42 #include "webrtc/system_wrappers/include/logging.h"
41 #include "webrtc/system_wrappers/include/metrics.h" 43 #include "webrtc/system_wrappers/include/metrics.h"
42 44
43 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 45 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 std::unique_ptr<TransientSuppressor> transient_suppressor; 127 std::unique_ptr<TransientSuppressor> transient_suppressor;
126 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer; 128 std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;
127 }; 129 };
128 130
129 struct AudioProcessingImpl::ApmPrivateSubmodules { 131 struct AudioProcessingImpl::ApmPrivateSubmodules {
130 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer) 132 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)
131 : beamformer(beamformer) {} 133 : beamformer(beamformer) {}
132 // Accessed internally from capture or during initialization 134 // Accessed internally from capture or during initialization
133 std::unique_ptr<Beamformer<float>> beamformer; 135 std::unique_ptr<Beamformer<float>> beamformer;
134 std::unique_ptr<AgcManagerDirect> agc_manager; 136 std::unique_ptr<AgcManagerDirect> agc_manager;
137 std::unique_ptr<LevelController> level_controller;
135 }; 138 };
136 139
137 AudioProcessing* AudioProcessing::Create() { 140 AudioProcessing* AudioProcessing::Create() {
138 Config config; 141 Config config;
139 return Create(config, nullptr); 142 return Create(config, nullptr);
140 } 143 }
141 144
142 AudioProcessing* AudioProcessing::Create(const Config& config) { 145 AudioProcessing* AudioProcessing::Create(const Config& config) {
143 return Create(config, nullptr); 146 return Create(config, nullptr);
144 } 147 }
(...skipping 23 matching lines...) Expand all
168 config.Get<ExperimentalAgc>().enabled), 171 config.Get<ExperimentalAgc>().enabled),
169 #endif 172 #endif
170 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) 173 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
171 capture_(false, 174 capture_(false,
172 #else 175 #else
173 capture_(config.Get<ExperimentalNs>().enabled, 176 capture_(config.Get<ExperimentalNs>().enabled,
174 #endif 177 #endif
175 config.Get<Beamforming>().array_geometry, 178 config.Get<Beamforming>().array_geometry,
176 config.Get<Beamforming>().target_direction), 179 config.Get<Beamforming>().target_direction),
177 capture_nonlocked_(config.Get<Beamforming>().enabled, 180 capture_nonlocked_(config.Get<Beamforming>().enabled,
178 config.Get<Intelligibility>().enabled) 181 config.Get<Intelligibility>().enabled,
179 { 182 config.Get<LevelControl>().enabled) {
180 { 183 {
181 rtc::CritScope cs_render(&crit_render_); 184 rtc::CritScope cs_render(&crit_render_);
182 rtc::CritScope cs_capture(&crit_capture_); 185 rtc::CritScope cs_capture(&crit_capture_);
183 186
184 public_submodules_->echo_cancellation.reset( 187 public_submodules_->echo_cancellation.reset(
185 new EchoCancellationImpl(&crit_render_, &crit_capture_)); 188 new EchoCancellationImpl(&crit_render_, &crit_capture_));
186 public_submodules_->echo_control_mobile.reset( 189 public_submodules_->echo_control_mobile.reset(
187 new EchoControlMobileImpl(&crit_render_, &crit_capture_)); 190 new EchoControlMobileImpl(&crit_render_, &crit_capture_));
188 public_submodules_->gain_control.reset( 191 public_submodules_->gain_control.reset(
189 new GainControlImpl(&crit_capture_, &crit_capture_)); 192 new GainControlImpl(&crit_capture_, &crit_capture_));
190 public_submodules_->high_pass_filter.reset( 193 public_submodules_->high_pass_filter.reset(
191 new HighPassFilterImpl(&crit_capture_)); 194 new HighPassFilterImpl(&crit_capture_));
192 public_submodules_->level_estimator.reset( 195 public_submodules_->level_estimator.reset(
193 new LevelEstimatorImpl(&crit_capture_)); 196 new LevelEstimatorImpl(&crit_capture_));
194 public_submodules_->noise_suppression.reset( 197 public_submodules_->noise_suppression.reset(
195 new NoiseSuppressionImpl(&crit_capture_)); 198 new NoiseSuppressionImpl(&crit_capture_));
196 public_submodules_->voice_detection.reset( 199 public_submodules_->voice_detection.reset(
197 new VoiceDetectionImpl(&crit_capture_)); 200 new VoiceDetectionImpl(&crit_capture_));
198 public_submodules_->gain_control_for_experimental_agc.reset( 201 public_submodules_->gain_control_for_experimental_agc.reset(
199 new GainControlForExperimentalAgc( 202 new GainControlForExperimentalAgc(
200 public_submodules_->gain_control.get(), &crit_capture_)); 203 public_submodules_->gain_control.get(), &crit_capture_));
204
205 private_submodules_->level_controller.reset(new LevelController());
201 } 206 }
202 207
203 SetExtraOptions(config); 208 SetExtraOptions(config);
204 } 209 }
205 210
206 AudioProcessingImpl::~AudioProcessingImpl() { 211 AudioProcessingImpl::~AudioProcessingImpl() {
207 // Depends on gain_control_ and 212 // Depends on gain_control_ and
208 // public_submodules_->gain_control_for_experimental_agc. 213 // public_submodules_->gain_control_for_experimental_agc.
209 private_submodules_->agc_manager.reset(); 214 private_submodules_->agc_manager.reset();
210 // Depends on gain_control_. 215 // Depends on gain_control_.
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 InitializeEchoCanceller(); 320 InitializeEchoCanceller();
316 InitializeEchoControlMobile(); 321 InitializeEchoControlMobile();
317 InitializeExperimentalAgc(); 322 InitializeExperimentalAgc();
318 InitializeTransient(); 323 InitializeTransient();
319 InitializeBeamformer(); 324 InitializeBeamformer();
320 InitializeIntelligibility(); 325 InitializeIntelligibility();
321 InitializeHighPassFilter(); 326 InitializeHighPassFilter();
322 InitializeNoiseSuppression(); 327 InitializeNoiseSuppression();
323 InitializeLevelEstimator(); 328 InitializeLevelEstimator();
324 InitializeVoiceDetection(); 329 InitializeVoiceDetection();
330 InitializeLevelController();
325 331
326 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 332 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
327 if (debug_dump_.debug_file->is_open()) { 333 if (debug_dump_.debug_file->is_open()) {
328 int err = WriteInitMessage(); 334 int err = WriteInitMessage();
329 if (err != kNoError) { 335 if (err != kNoError) {
330 return err; 336 return err;
331 } 337 }
332 } 338 }
333 #endif 339 #endif
334 340
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
401 407
402 public_submodules_->echo_cancellation->SetExtraOptions(config); 408 public_submodules_->echo_cancellation->SetExtraOptions(config);
403 409
404 if (capture_.transient_suppressor_enabled != 410 if (capture_.transient_suppressor_enabled !=
405 config.Get<ExperimentalNs>().enabled) { 411 config.Get<ExperimentalNs>().enabled) {
406 capture_.transient_suppressor_enabled = 412 capture_.transient_suppressor_enabled =
407 config.Get<ExperimentalNs>().enabled; 413 config.Get<ExperimentalNs>().enabled;
408 InitializeTransient(); 414 InitializeTransient();
409 } 415 }
410 416
417 if (capture_nonlocked_.level_controller_enabled !=
418 config.Get<LevelControl>().enabled) {
419 capture_nonlocked_.level_controller_enabled =
420 config.Get<LevelControl>().enabled;
421 InitializeLevelController();
422 }
423
411 if(capture_nonlocked_.intelligibility_enabled != 424 if(capture_nonlocked_.intelligibility_enabled !=
412 config.Get<Intelligibility>().enabled) { 425 config.Get<Intelligibility>().enabled) {
413 capture_nonlocked_.intelligibility_enabled = 426 capture_nonlocked_.intelligibility_enabled =
414 config.Get<Intelligibility>().enabled; 427 config.Get<Intelligibility>().enabled;
415 InitializeIntelligibility(); 428 InitializeIntelligibility();
416 } 429 }
417 430
418 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD 431 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
419 if (capture_nonlocked_.beamformer_enabled != 432 if (capture_nonlocked_.beamformer_enabled !=
420 config.Get<Beamforming>().enabled) { 433 config.Get<Beamforming>().enabled) {
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after
752 ? private_submodules_->agc_manager->voice_probability() 765 ? private_submodules_->agc_manager->voice_probability()
753 : 1.f; 766 : 1.f;
754 767
755 public_submodules_->transient_suppressor->Suppress( 768 public_submodules_->transient_suppressor->Suppress(
756 ca->channels_f()[0], ca->num_frames(), ca->num_channels(), 769 ca->channels_f()[0], ca->num_frames(), ca->num_channels(),
757 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(), 770 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),
758 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability, 771 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,
759 capture_.key_pressed); 772 capture_.key_pressed);
760 } 773 }
761 774
775 if (capture_nonlocked_.level_controller_enabled) {
776 private_submodules_->level_controller->Process(ca);
777 }
778
762 // The level estimator operates on the recombined data. 779 // The level estimator operates on the recombined data.
763 public_submodules_->level_estimator->ProcessStream(ca); 780 public_submodules_->level_estimator->ProcessStream(ca);
764 781
765 capture_.was_stream_delay_set = false; 782 capture_.was_stream_delay_set = false;
766 return kNoError; 783 return kNoError;
767 } 784 }
768 785
769 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, 786 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
770 size_t samples_per_channel, 787 size_t samples_per_channel,
771 int rev_sample_rate_hz, 788 int rev_sample_rate_hz,
(...skipping 339 matching lines...) Expand 10 before | Expand all | Expand 10 after
1111 } 1128 }
1112 1129
1113 // The capture data is otherwise unchanged. 1130 // The capture data is otherwise unchanged.
1114 return false; 1131 return false;
1115 } 1132 }
1116 1133
1117 bool AudioProcessingImpl::output_copy_needed() const { 1134 bool AudioProcessingImpl::output_copy_needed() const {
1118 // Check if we've upmixed or downmixed the audio. 1135 // Check if we've upmixed or downmixed the audio.
1119 return ((formats_.api_format.output_stream().num_channels() != 1136 return ((formats_.api_format.output_stream().num_channels() !=
1120 formats_.api_format.input_stream().num_channels()) || 1137 formats_.api_format.input_stream().num_channels()) ||
1121 is_fwd_processed() || capture_.transient_suppressor_enabled); 1138 is_fwd_processed() || capture_.transient_suppressor_enabled ||
1139 capture_nonlocked_.level_controller_enabled);
1122 } 1140 }
1123 1141
1124 bool AudioProcessingImpl::fwd_synthesis_needed() const { 1142 bool AudioProcessingImpl::fwd_synthesis_needed() const {
1125 return (is_fwd_processed() && 1143 return (is_fwd_processed() &&
1126 is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz())); 1144 is_multi_band(capture_nonlocked_.fwd_proc_format.sample_rate_hz()));
1127 } 1145 }
1128 1146
1129 bool AudioProcessingImpl::fwd_analysis_needed() const { 1147 bool AudioProcessingImpl::fwd_analysis_needed() const {
1130 if (!is_fwd_processed() && 1148 if (!is_fwd_processed() &&
1131 !public_submodules_->voice_detection->is_enabled() && 1149 !public_submodules_->voice_detection->is_enabled() &&
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
1240 public_submodules_->echo_control_mobile->Initialize( 1258 public_submodules_->echo_control_mobile->Initialize(
1241 proc_split_sample_rate_hz(), 1259 proc_split_sample_rate_hz(),
1242 num_reverse_channels(), 1260 num_reverse_channels(),
1243 num_output_channels()); 1261 num_output_channels());
1244 } 1262 }
1245 1263
1246 void AudioProcessingImpl::InitializeLevelEstimator() { 1264 void AudioProcessingImpl::InitializeLevelEstimator() {
1247 public_submodules_->level_estimator->Initialize(); 1265 public_submodules_->level_estimator->Initialize();
1248 } 1266 }
1249 1267
1268 void AudioProcessingImpl::InitializeLevelController() {
1269 private_submodules_->level_controller->Initialize(proc_sample_rate_hz(),
1270 num_proc_channels());
1271 }
1272
1250 void AudioProcessingImpl::InitializeVoiceDetection() { 1273 void AudioProcessingImpl::InitializeVoiceDetection() {
1251 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); 1274 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
1252 } 1275 }
1253 1276
1254 void AudioProcessingImpl::MaybeUpdateHistograms() { 1277 void AudioProcessingImpl::MaybeUpdateHistograms() {
1255 static const int kMinDiffDelayMs = 60; 1278 static const int kMinDiffDelayMs = 60;
1256 1279
1257 if (echo_cancellation()->is_enabled()) { 1280 if (echo_cancellation()->is_enabled()) {
1258 // Activate delay_jumps_ counters if we know echo_cancellation is runnning. 1281 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.
1259 // If a stream has echo we know that the echo_cancellation is in process. 1282 // If a stream has echo we know that the echo_cancellation is in process.
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
1455 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config); 1478 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);
1456 1479
1457 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), 1480 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
1458 &debug_dump_.num_bytes_left_for_log_, 1481 &debug_dump_.num_bytes_left_for_log_,
1459 &crit_debug_, &debug_dump_.capture)); 1482 &crit_debug_, &debug_dump_.capture));
1460 return kNoError; 1483 return kNoError;
1461 } 1484 }
1462 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1485 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1463 1486
1464 } // namespace webrtc 1487 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698