Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 10 matching lines...) Expand all
21 #include "webrtc/modules/audio_processing/aec/aec_core.h" 21 #include "webrtc/modules/audio_processing/aec/aec_core.h"
22 } 22 }
23 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" 23 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
24 #include "webrtc/modules/audio_processing/audio_buffer.h" 24 #include "webrtc/modules/audio_processing/audio_buffer.h"
25 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" 25 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
26 #include "webrtc/modules/audio_processing/common.h" 26 #include "webrtc/modules/audio_processing/common.h"
27 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" 27 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
28 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" 28 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
29 #include "webrtc/modules/audio_processing/gain_control_impl.h" 29 #include "webrtc/modules/audio_processing/gain_control_impl.h"
30 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" 30 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
31 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
31 #include "webrtc/modules/audio_processing/level_estimator_impl.h" 32 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
32 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" 33 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
33 #include "webrtc/modules/audio_processing/processing_component.h" 34 #include "webrtc/modules/audio_processing/processing_component.h"
34 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" 35 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
35 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 36 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
36 #include "webrtc/modules/interface/module_common_types.h" 37 #include "webrtc/modules/interface/module_common_types.h"
37 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 38 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
38 #include "webrtc/system_wrappers/interface/file_wrapper.h" 39 #include "webrtc/system_wrappers/interface/file_wrapper.h"
39 #include "webrtc/system_wrappers/interface/logging.h" 40 #include "webrtc/system_wrappers/interface/logging.h"
40 #include "webrtc/system_wrappers/interface/metrics.h" 41 #include "webrtc/system_wrappers/interface/metrics.h"
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 #endif 189 #endif
189 agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume), 190 agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume),
190 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) 191 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
191 transient_suppressor_enabled_(false), 192 transient_suppressor_enabled_(false),
192 #else 193 #else
193 transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled), 194 transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
194 #endif 195 #endif
195 beamformer_enabled_(config.Get<Beamforming>().enabled), 196 beamformer_enabled_(config.Get<Beamforming>().enabled),
196 beamformer_(beamformer), 197 beamformer_(beamformer),
197 array_geometry_(config.Get<Beamforming>().array_geometry), 198 array_geometry_(config.Get<Beamforming>().array_geometry),
198 supports_48kHz_(config.Get<AudioProcessing48kHzSupport>().enabled) { 199 supports_48kHz_(config.Get<AudioProcessing48kHzSupport>().enabled),
200 intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
199 echo_cancellation_ = new EchoCancellationImpl(this, crit_); 201 echo_cancellation_ = new EchoCancellationImpl(this, crit_);
200 component_list_.push_back(echo_cancellation_); 202 component_list_.push_back(echo_cancellation_);
201 203
202 echo_control_mobile_ = new EchoControlMobileImpl(this, crit_); 204 echo_control_mobile_ = new EchoControlMobileImpl(this, crit_);
203 component_list_.push_back(echo_control_mobile_); 205 component_list_.push_back(echo_control_mobile_);
204 206
205 gain_control_ = new GainControlImpl(this, crit_); 207 gain_control_ = new GainControlImpl(this, crit_);
206 component_list_.push_back(gain_control_); 208 component_list_.push_back(gain_control_);
207 209
208 high_pass_filter_ = new HighPassFilterImpl(this, crit_); 210 high_pass_filter_ = new HighPassFilterImpl(this, crit_);
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
298 return err; 300 return err;
299 } 301 }
300 } 302 }
301 303
302 InitializeExperimentalAgc(); 304 InitializeExperimentalAgc();
303 305
304 InitializeTransient(); 306 InitializeTransient();
305 307
306 InitializeBeamformer(); 308 InitializeBeamformer();
307 309
310 InitializeIntelligibility();
311
308 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 312 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
309 if (debug_file_->Open()) { 313 if (debug_file_->Open()) {
310 int err = WriteInitMessage(); 314 int err = WriteInitMessage();
311 if (err != kNoError) { 315 if (err != kNoError) {
312 return err; 316 return err;
313 } 317 }
314 } 318 }
315 #endif 319 #endif
316 320
317 return kNoError; 321 return kNoError;
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
420 void AudioProcessingImpl::SetExtraOptions(const Config& config) { 424 void AudioProcessingImpl::SetExtraOptions(const Config& config) {
421 CriticalSectionScoped crit_scoped(crit_); 425 CriticalSectionScoped crit_scoped(crit_);
422 for (auto item : component_list_) { 426 for (auto item : component_list_) {
423 item->SetExtraOptions(config); 427 item->SetExtraOptions(config);
424 } 428 }
425 429
426 if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) { 430 if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) {
427 transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled; 431 transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled;
428 InitializeTransient(); 432 InitializeTransient();
429 } 433 }
434
435 if (intelligibility_enabled_ != config.Get<Intelligibility>().enabled) {
436 intelligibility_enabled_ = config.Get<Intelligibility>().enabled;
437 InitializeIntelligibility();
438 }
430 } 439 }
431 440
432 int AudioProcessingImpl::input_sample_rate_hz() const { 441 int AudioProcessingImpl::input_sample_rate_hz() const {
433 CriticalSectionScoped crit_scoped(crit_); 442 CriticalSectionScoped crit_scoped(crit_);
434 return fwd_in_format_.rate(); 443 return fwd_in_format_.rate();
435 } 444 }
436 445
437 int AudioProcessingImpl::sample_rate_hz() const { 446 int AudioProcessingImpl::sample_rate_hz() const {
438 CriticalSectionScoped crit_scoped(crit_); 447 CriticalSectionScoped crit_scoped(crit_);
439 return fwd_in_format_.rate(); 448 return fwd_in_format_.rate();
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
592 msg->set_delay(stream_delay_ms_); 601 msg->set_delay(stream_delay_ms_);
593 msg->set_drift(echo_cancellation_->stream_drift_samples()); 602 msg->set_drift(echo_cancellation_->stream_drift_samples());
594 msg->set_level(gain_control()->stream_analog_level()); 603 msg->set_level(gain_control()->stream_analog_level());
595 msg->set_keypress(key_pressed_); 604 msg->set_keypress(key_pressed_);
596 } 605 }
597 #endif 606 #endif
598 607
599 MaybeUpdateHistograms(); 608 MaybeUpdateHistograms();
600 609
601 AudioBuffer* ca = capture_audio_.get(); // For brevity. 610 AudioBuffer* ca = capture_audio_.get(); // For brevity.
611
612 if (intelligibility_enabled_) {
613 float voice_probability =
614 agc_manager_.get() ? agc_manager_->voice_probability() : 0.f;
turaj 2015/07/14 18:28:51 Shouldn't we turn activity detector part of AGC on
aluebs-webrtc 2015/07/15 01:02:04 Also, to avoid being out of sync, this should prob
ekm 2015/07/17 19:59:38 To avoid these problems, and simplify things, swit
615 intelligibility_enhancer_->ProcessCaptureAudio(
616 ca->split_channels_f(kBand0To8kHz), voice_probability);
617 }
618
602 if (use_new_agc_ && gain_control_->is_enabled()) { 619 if (use_new_agc_ && gain_control_->is_enabled()) {
603 agc_manager_->AnalyzePreProcess(ca->channels()[0], 620 agc_manager_->AnalyzePreProcess(ca->channels()[0],
604 ca->num_channels(), 621 ca->num_channels(),
605 fwd_proc_format_.samples_per_channel()); 622 fwd_proc_format_.samples_per_channel());
606 } 623 }
607 624
608 bool data_processed = is_data_processed(); 625 bool data_processed = is_data_processed();
609 if (analysis_needed(data_processed)) { 626 if (analysis_needed(data_processed)) {
610 ca->SplitIntoFrequencyBands(); 627 ca->SplitIntoFrequencyBands();
611 } 628 }
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
657 key_pressed_); 674 key_pressed_);
658 } 675 }
659 676
660 // The level estimator operates on the recombined data. 677 // The level estimator operates on the recombined data.
661 RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); 678 RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
662 679
663 was_stream_delay_set_ = false; 680 was_stream_delay_set_ = false;
664 return kNoError; 681 return kNoError;
665 } 682 }
666 683
667 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, 684 int AudioProcessingImpl::AnalyzeReverseStream(float* const* data,
668 int samples_per_channel, 685 int samples_per_channel,
669 int sample_rate_hz, 686 int sample_rate_hz,
670 ChannelLayout layout) { 687 ChannelLayout layout) {
671 CriticalSectionScoped crit_scoped(crit_); 688 CriticalSectionScoped crit_scoped(crit_);
672 if (data == NULL) { 689 if (data == NULL) {
673 return kNullPointerError; 690 return kNullPointerError;
674 } 691 }
675 692
676 const int num_channels = ChannelsFromLayout(layout); 693 const int num_channels = ChannelsFromLayout(layout);
677 RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), 694 RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),
(...skipping 12 matching lines...) Expand all
690 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 707 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
691 const size_t channel_size = 708 const size_t channel_size =
692 sizeof(float) * rev_in_format_.samples_per_channel(); 709 sizeof(float) * rev_in_format_.samples_per_channel();
693 for (int i = 0; i < num_channels; ++i) 710 for (int i = 0; i < num_channels; ++i)
694 msg->add_channel(data[i], channel_size); 711 msg->add_channel(data[i], channel_size);
695 RETURN_ON_ERR(WriteMessageToDebugFile()); 712 RETURN_ON_ERR(WriteMessageToDebugFile());
696 } 713 }
697 #endif 714 #endif
698 715
699 render_audio_->CopyFrom(data, samples_per_channel, layout); 716 render_audio_->CopyFrom(data, samples_per_channel, layout);
700 return AnalyzeReverseStreamLocked(); 717 RETURN_ON_ERR(AnalyzeReverseStreamLocked());
718 if (intelligibility_enabled_) {
719 render_audio_->CopyTo(samples_per_channel, layout, data);
720 }
721
722 return kNoError;
701 } 723 }
702 724
703 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { 725 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
704 CriticalSectionScoped crit_scoped(crit_); 726 CriticalSectionScoped crit_scoped(crit_);
705 if (frame == NULL) { 727 if (frame == NULL) {
706 return kNullPointerError; 728 return kNullPointerError;
707 } 729 }
708 // Must be a native rate. 730 // Must be a native rate.
709 if (frame->sample_rate_hz_ != kSampleRate8kHz && 731 if (frame->sample_rate_hz_ != kSampleRate8kHz &&
710 frame->sample_rate_hz_ != kSampleRate16kHz && 732 frame->sample_rate_hz_ != kSampleRate16kHz &&
(...skipping 22 matching lines...) Expand all
733 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 755 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
734 const size_t data_size = sizeof(int16_t) * 756 const size_t data_size = sizeof(int16_t) *
735 frame->samples_per_channel_ * 757 frame->samples_per_channel_ *
736 frame->num_channels_; 758 frame->num_channels_;
737 msg->set_data(frame->data_, data_size); 759 msg->set_data(frame->data_, data_size);
738 RETURN_ON_ERR(WriteMessageToDebugFile()); 760 RETURN_ON_ERR(WriteMessageToDebugFile());
739 } 761 }
740 #endif 762 #endif
741 763
742 render_audio_->DeinterleaveFrom(frame); 764 render_audio_->DeinterleaveFrom(frame);
743 return AnalyzeReverseStreamLocked(); 765 RETURN_ON_ERR(AnalyzeReverseStreamLocked());
766 if (intelligibility_enabled_) {
aluebs-webrtc 2015/07/15 01:02:04 You don't need this if statement, because Interlea
ekm 2015/07/17 19:59:38 Done. As a result, I updated audio_buffer.Interlea
aluebs-webrtc 2015/07/20 19:33:42 Ack
767 render_audio_->InterleaveTo(frame, intelligibility_enabled_);
768 }
769
770 return kNoError;
744 } 771 }
745 772
746 int AudioProcessingImpl::AnalyzeReverseStreamLocked() { 773 int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
747 AudioBuffer* ra = render_audio_.get(); // For brevity. 774 AudioBuffer* ra = render_audio_.get(); // For brevity.
748 if (rev_proc_format_.rate() == kSampleRate32kHz) { 775 if (rev_proc_format_.rate() == kSampleRate32kHz) {
749 ra->SplitIntoFrequencyBands(); 776 ra->SplitIntoFrequencyBands();
750 } 777 }
751 778
752 RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); 779 RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
753 RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); 780 RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
754 if (!use_new_agc_) { 781 if (!use_new_agc_) {
755 RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); 782 RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
756 } 783 }
757 784
785 if (intelligibility_enabled_) {
turaj 2015/07/14 18:28:51 I suppose we want to do all the modifications to p
ekm 2015/07/17 19:59:37 Done.
786 intelligibility_enhancer_->ProcessRenderAudio(
787 ra->split_channels_f(kBand0To8kHz));
aluebs-webrtc 2015/07/15 01:02:04 Maybe not for this CL, but at some point this need
ekm 2015/07/17 19:59:38 Acknowledged. I think we'll save this for a later
aluebs-webrtc 2015/07/20 19:33:42 Agreed on leaving for another CL. I don't think an
ekm 2015/07/21 19:22:13 Acknowledged.
788 }
789
758 return kNoError; 790 return kNoError;
759 } 791 }
760 792
761 int AudioProcessingImpl::set_stream_delay_ms(int delay) { 793 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
762 Error retval = kNoError; 794 Error retval = kNoError;
763 was_stream_delay_set_ = true; 795 was_stream_delay_set_ = true;
764 delay += delay_offset_ms_; 796 delay += delay_offset_ms_;
765 797
766 if (delay < 0) { 798 if (delay < 0) {
767 delay = 0; 799 delay = 0;
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
994 1026
995 void AudioProcessingImpl::InitializeBeamformer() { 1027 void AudioProcessingImpl::InitializeBeamformer() {
996 if (beamformer_enabled_) { 1028 if (beamformer_enabled_) {
997 if (!beamformer_) { 1029 if (!beamformer_) {
998 beamformer_.reset(new NonlinearBeamformer(array_geometry_)); 1030 beamformer_.reset(new NonlinearBeamformer(array_geometry_));
999 } 1031 }
1000 beamformer_->Initialize(kChunkSizeMs, split_rate_); 1032 beamformer_->Initialize(kChunkSizeMs, split_rate_);
1001 } 1033 }
1002 } 1034 }
1003 1035
1036 void AudioProcessingImpl::InitializeIntelligibility() {
1037 if (intelligibility_enabled_) {
1038 if (!intelligibility_enhancer_) {
aluebs-webrtc 2015/07/15 01:02:04 We probably want to reset the intelligibility_enha
ekm 2015/07/17 19:59:38 Done.
1039 IntelligibilityEnhancer::Config config;
1040 config.sample_rate_hz = split_rate_;
1041 config.channels = fwd_in_format_.num_channels();
aluebs-webrtc 2015/07/15 01:02:04 Shouldn't this be fwd_proc_format_?
ekm 2015/07/17 19:59:38 Just set it to single channel for now, since that'
aluebs-webrtc 2015/07/20 19:33:42 Then I think it is better to set the correct value
ekm 2015/07/21 19:22:13 I agree. The problem was that the enhancer only ha
aluebs-webrtc 2015/07/21 21:30:22 This is better.
1042 intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
1043 }
1044 }
1045 }
1046
1004 void AudioProcessingImpl::MaybeUpdateHistograms() { 1047 void AudioProcessingImpl::MaybeUpdateHistograms() {
1005 static const int kMinDiffDelayMs = 60; 1048 static const int kMinDiffDelayMs = 60;
1006 1049
1007 if (echo_cancellation()->is_enabled()) { 1050 if (echo_cancellation()->is_enabled()) {
1008 // Activate delay_jumps_ counters if we know echo_cancellation is runnning. 1051 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.
1009 // If a stream has echo we know that the echo_cancellation is in process. 1052 // If a stream has echo we know that the echo_cancellation is in process.
1010 if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) { 1053 if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) {
1011 stream_delay_jumps_ = 0; 1054 stream_delay_jumps_ = 0;
1012 } 1055 }
1013 if (aec_system_delay_jumps_ == -1 && 1056 if (aec_system_delay_jumps_ == -1 &&
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1106 int err = WriteMessageToDebugFile(); 1149 int err = WriteMessageToDebugFile();
1107 if (err != kNoError) { 1150 if (err != kNoError) {
1108 return err; 1151 return err;
1109 } 1152 }
1110 1153
1111 return kNoError; 1154 return kNoError;
1112 } 1155 }
1113 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1156 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1114 1157
1115 } // namespace webrtc 1158 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698