Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(100)

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Addressed comments from Patch Set 7 Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 #include "webrtc/modules/audio_processing/aec/aec_core.h" 22 #include "webrtc/modules/audio_processing/aec/aec_core.h"
23 } 23 }
24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" 24 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
25 #include "webrtc/modules/audio_processing/audio_buffer.h" 25 #include "webrtc/modules/audio_processing/audio_buffer.h"
26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" 26 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
27 #include "webrtc/modules/audio_processing/common.h" 27 #include "webrtc/modules/audio_processing/common.h"
28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" 28 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" 29 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
30 #include "webrtc/modules/audio_processing/gain_control_impl.h" 30 #include "webrtc/modules/audio_processing/gain_control_impl.h"
31 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" 31 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
32 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
32 #include "webrtc/modules/audio_processing/level_estimator_impl.h" 33 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
33 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" 34 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
34 #include "webrtc/modules/audio_processing/processing_component.h" 35 #include "webrtc/modules/audio_processing/processing_component.h"
35 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" 36 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
36 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 37 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
37 #include "webrtc/modules/interface/module_common_types.h" 38 #include "webrtc/modules/interface/module_common_types.h"
38 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 39 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
39 #include "webrtc/system_wrappers/interface/file_wrapper.h" 40 #include "webrtc/system_wrappers/interface/file_wrapper.h"
40 #include "webrtc/system_wrappers/interface/logging.h" 41 #include "webrtc/system_wrappers/interface/logging.h"
41 #include "webrtc/system_wrappers/interface/metrics.h" 42 #include "webrtc/system_wrappers/interface/metrics.h"
(...skipping 161 matching lines...) Expand 10 before | Expand all | Expand 10 after
203 use_new_agc_(config.Get<ExperimentalAgc>().enabled), 204 use_new_agc_(config.Get<ExperimentalAgc>().enabled),
204 #endif 205 #endif
205 agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume), 206 agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume),
206 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) 207 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
207 transient_suppressor_enabled_(false), 208 transient_suppressor_enabled_(false),
208 #else 209 #else
209 transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled), 210 transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
210 #endif 211 #endif
211 beamformer_enabled_(config.Get<Beamforming>().enabled), 212 beamformer_enabled_(config.Get<Beamforming>().enabled),
212 beamformer_(beamformer), 213 beamformer_(beamformer),
213 array_geometry_(config.Get<Beamforming>().array_geometry) { 214 array_geometry_(config.Get<Beamforming>().array_geometry),
215 intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
214 echo_cancellation_ = new EchoCancellationImpl(this, crit_); 216 echo_cancellation_ = new EchoCancellationImpl(this, crit_);
215 component_list_.push_back(echo_cancellation_); 217 component_list_.push_back(echo_cancellation_);
216 218
217 echo_control_mobile_ = new EchoControlMobileImpl(this, crit_); 219 echo_control_mobile_ = new EchoControlMobileImpl(this, crit_);
218 component_list_.push_back(echo_control_mobile_); 220 component_list_.push_back(echo_control_mobile_);
219 221
220 gain_control_ = new GainControlImpl(this, crit_); 222 gain_control_ = new GainControlImpl(this, crit_);
221 component_list_.push_back(gain_control_); 223 component_list_.push_back(gain_control_);
222 224
223 high_pass_filter_ = new HighPassFilterImpl(this, crit_); 225 high_pass_filter_ = new HighPassFilterImpl(this, crit_);
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
322 return err; 324 return err;
323 } 325 }
324 } 326 }
325 327
326 InitializeExperimentalAgc(); 328 InitializeExperimentalAgc();
327 329
328 InitializeTransient(); 330 InitializeTransient();
329 331
330 InitializeBeamformer(); 332 InitializeBeamformer();
331 333
334 InitializeIntelligibility();
335
332 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 336 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
333 if (debug_file_->Open()) { 337 if (debug_file_->Open()) {
334 int err = WriteInitMessage(); 338 int err = WriteInitMessage();
335 if (err != kNoError) { 339 if (err != kNoError) {
336 return err; 340 return err;
337 } 341 }
338 } 342 }
339 #endif 343 #endif
340 344
341 return kNoError; 345 return kNoError;
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
616 msg->set_delay(stream_delay_ms_); 620 msg->set_delay(stream_delay_ms_);
617 msg->set_drift(echo_cancellation_->stream_drift_samples()); 621 msg->set_drift(echo_cancellation_->stream_drift_samples());
618 msg->set_level(gain_control()->stream_analog_level()); 622 msg->set_level(gain_control()->stream_analog_level());
619 msg->set_keypress(key_pressed_); 623 msg->set_keypress(key_pressed_);
620 } 624 }
621 #endif 625 #endif
622 626
623 MaybeUpdateHistograms(); 627 MaybeUpdateHistograms();
624 628
625 AudioBuffer* ca = capture_audio_.get(); // For brevity. 629 AudioBuffer* ca = capture_audio_.get(); // For brevity.
630
626 if (use_new_agc_ && gain_control_->is_enabled()) { 631 if (use_new_agc_ && gain_control_->is_enabled()) {
627 agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), 632 agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(),
628 fwd_proc_format_.num_frames()); 633 fwd_proc_format_.num_frames());
629 } 634 }
630 635
631 bool data_processed = is_data_processed(); 636 bool data_processed = is_data_processed();
632 if (analysis_needed(data_processed)) { 637 if (analysis_needed(data_processed)) {
633 ca->SplitIntoFrequencyBands(); 638 ca->SplitIntoFrequencyBands();
634 } 639 }
635 640
641 if (intelligibility_enabled_) {
642 intelligibility_enhancer_->AnalyzeCaptureAudio(
643 ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());
644 }
645
636 if (beamformer_enabled_) { 646 if (beamformer_enabled_) {
637 beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); 647 beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());
638 ca->set_num_channels(1); 648 ca->set_num_channels(1);
639 } 649 }
640 650
641 RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca)); 651 RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca));
642 RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca)); 652 RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca));
643 RETURN_ON_ERR(noise_suppression_->AnalyzeCaptureAudio(ca)); 653 RETURN_ON_ERR(noise_suppression_->AnalyzeCaptureAudio(ca));
644 RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca)); 654 RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca));
645 655
(...skipping 28 matching lines...) Expand all
674 key_pressed_); 684 key_pressed_);
675 } 685 }
676 686
677 // The level estimator operates on the recombined data. 687 // The level estimator operates on the recombined data.
678 RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); 688 RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
679 689
680 was_stream_delay_set_ = false; 690 was_stream_delay_set_ = false;
681 return kNoError; 691 return kNoError;
682 } 692 }
683 693
694 int AudioProcessingImpl::ProcessReverseStream(float* const* data,
695 int samples_per_channel,
696 int rev_sample_rate_hz,
697 ChannelLayout layout) {
698 RETURN_ON_ERR(AnalyzeReverseStream(data, samples_per_channel,
699 rev_sample_rate_hz, layout));
700 if (is_rev_processed()) {
701 render_audio_->CopyTo(api_format_.reverse_stream(), data);
702 }
703
704 return kNoError;
705 }
706
684 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, 707 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
685 int samples_per_channel, 708 int samples_per_channel,
686 int sample_rate_hz, 709 int rev_sample_rate_hz,
687 ChannelLayout layout) { 710 ChannelLayout layout) {
688 const StreamConfig reverse_config = { 711 const StreamConfig reverse_config = {
689 sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), 712 rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
690 }; 713 };
691 if (samples_per_channel != reverse_config.num_frames()) { 714 if (samples_per_channel != reverse_config.num_frames()) {
692 return kBadDataLengthError; 715 return kBadDataLengthError;
693 } 716 }
694 return AnalyzeReverseStream(data, reverse_config); 717 return AnalyzeReverseStream(data, reverse_config);
695 } 718 }
696 719
720 int AudioProcessingImpl::ProcessReverseStream(
721 float* const* data,
722 const StreamConfig& reverse_config) {
723 RETURN_ON_ERR(AnalyzeReverseStream(data, reverse_config));
724 if (is_rev_processed()) {
725 render_audio_->CopyTo(api_format_.reverse_stream(), data);
726 }
727
728 return kNoError;
729 }
730
697 int AudioProcessingImpl::AnalyzeReverseStream( 731 int AudioProcessingImpl::AnalyzeReverseStream(
698 const float* const* data, 732 const float* const* data,
699 const StreamConfig& reverse_config) { 733 const StreamConfig& reverse_config) {
700 CriticalSectionScoped crit_scoped(crit_); 734 CriticalSectionScoped crit_scoped(crit_);
701 if (data == NULL) { 735 if (data == NULL) {
702 return kNullPointerError; 736 return kNullPointerError;
703 } 737 }
704 738
705 if (reverse_config.num_channels() <= 0) { 739 if (reverse_config.num_channels() <= 0) {
706 return kBadNumberChannelsError; 740 return kBadNumberChannelsError;
(...skipping 12 matching lines...) Expand all
719 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 753 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
720 const size_t channel_size = 754 const size_t channel_size =
721 sizeof(float) * api_format_.reverse_stream().num_frames(); 755 sizeof(float) * api_format_.reverse_stream().num_frames();
722 for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i) 756 for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i)
723 msg->add_channel(data[i], channel_size); 757 msg->add_channel(data[i], channel_size);
724 RETURN_ON_ERR(WriteMessageToDebugFile()); 758 RETURN_ON_ERR(WriteMessageToDebugFile());
725 } 759 }
726 #endif 760 #endif
727 761
728 render_audio_->CopyFrom(data, api_format_.reverse_stream()); 762 render_audio_->CopyFrom(data, api_format_.reverse_stream());
729 return AnalyzeReverseStreamLocked(); 763 return ProcessReverseStreamLocked();
764 }
765
766 int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
767 RETURN_ON_ERR(AnalyzeReverseStream(frame));
768 if (is_rev_processed()) {
769 render_audio_->InterleaveTo(frame, true);
770 }
771
772 return kNoError;
730 } 773 }
731 774
732 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { 775 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
733 CriticalSectionScoped crit_scoped(crit_); 776 CriticalSectionScoped crit_scoped(crit_);
734 if (frame == NULL) { 777 if (frame == NULL) {
735 return kNullPointerError; 778 return kNullPointerError;
736 } 779 }
737 // Must be a native rate. 780 // Must be a native rate.
738 if (frame->sample_rate_hz_ != kSampleRate8kHz && 781 if (frame->sample_rate_hz_ != kSampleRate8kHz &&
739 frame->sample_rate_hz_ != kSampleRate16kHz && 782 frame->sample_rate_hz_ != kSampleRate16kHz &&
(...skipping 23 matching lines...) Expand all
763 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 806 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
764 if (debug_file_->Open()) { 807 if (debug_file_->Open()) {
765 event_msg_->set_type(audioproc::Event::REVERSE_STREAM); 808 event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
766 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 809 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
767 const size_t data_size = 810 const size_t data_size =
768 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; 811 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
769 msg->set_data(frame->data_, data_size); 812 msg->set_data(frame->data_, data_size);
770 RETURN_ON_ERR(WriteMessageToDebugFile()); 813 RETURN_ON_ERR(WriteMessageToDebugFile());
771 } 814 }
772 #endif 815 #endif
773
774 render_audio_->DeinterleaveFrom(frame); 816 render_audio_->DeinterleaveFrom(frame);
775 return AnalyzeReverseStreamLocked(); 817 return ProcessReverseStreamLocked();
776 } 818 }
777 819
778 int AudioProcessingImpl::AnalyzeReverseStreamLocked() { 820 int AudioProcessingImpl::ProcessReverseStreamLocked() {
779 AudioBuffer* ra = render_audio_.get(); // For brevity. 821 AudioBuffer* ra = render_audio_.get(); // For brevity.
780 if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { 822 if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) {
781 ra->SplitIntoFrequencyBands(); 823 ra->SplitIntoFrequencyBands();
782 } 824 }
783 825
826 if (intelligibility_enabled_) {
827 intelligibility_enhancer_->ProcessRenderAudio(
828 ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());
829 }
830
784 RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); 831 RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
785 RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); 832 RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
786 if (!use_new_agc_) { 833 if (!use_new_agc_) {
787 RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); 834 RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
788 } 835 }
789 836
837 if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz &&
838 is_rev_processed()) {
839 ra->MergeFrequencyBands();
840 }
841
790 return kNoError; 842 return kNoError;
791 } 843 }
792 844
793 int AudioProcessingImpl::set_stream_delay_ms(int delay) { 845 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
794 Error retval = kNoError; 846 Error retval = kNoError;
795 was_stream_delay_set_ = true; 847 was_stream_delay_set_ = true;
796 delay += delay_offset_ms_; 848 delay += delay_offset_ms_;
797 849
798 if (delay < 0) { 850 if (delay < 0) {
799 delay = 0; 851 delay = 0;
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
996 // Only level_estimator_ is enabled. 1048 // Only level_estimator_ is enabled.
997 return false; 1049 return false;
998 } else if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || 1050 } else if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz ||
999 fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { 1051 fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) {
1000 // Something besides level_estimator_ is enabled, and we have super-wb. 1052 // Something besides level_estimator_ is enabled, and we have super-wb.
1001 return true; 1053 return true;
1002 } 1054 }
1003 return false; 1055 return false;
1004 } 1056 }
1005 1057
1058 bool AudioProcessingImpl::is_rev_processed() const {
1059 return intelligibility_enabled_ && intelligibility_enhancer_->active();
1060 }
1061
1006 void AudioProcessingImpl::InitializeExperimentalAgc() { 1062 void AudioProcessingImpl::InitializeExperimentalAgc() {
1007 if (use_new_agc_) { 1063 if (use_new_agc_) {
1008 if (!agc_manager_.get()) { 1064 if (!agc_manager_.get()) {
1009 agc_manager_.reset(new AgcManagerDirect(gain_control_, 1065 agc_manager_.reset(new AgcManagerDirect(gain_control_,
1010 gain_control_for_new_agc_.get(), 1066 gain_control_for_new_agc_.get(),
1011 agc_startup_min_volume_)); 1067 agc_startup_min_volume_));
1012 } 1068 }
1013 agc_manager_->Initialize(); 1069 agc_manager_->Initialize();
1014 agc_manager_->SetCaptureMuted(output_will_be_muted_); 1070 agc_manager_->SetCaptureMuted(output_will_be_muted_);
1015 } 1071 }
(...skipping 12 matching lines...) Expand all
1028 1084
1029 void AudioProcessingImpl::InitializeBeamformer() { 1085 void AudioProcessingImpl::InitializeBeamformer() {
1030 if (beamformer_enabled_) { 1086 if (beamformer_enabled_) {
1031 if (!beamformer_) { 1087 if (!beamformer_) {
1032 beamformer_.reset(new NonlinearBeamformer(array_geometry_)); 1088 beamformer_.reset(new NonlinearBeamformer(array_geometry_));
1033 } 1089 }
1034 beamformer_->Initialize(kChunkSizeMs, split_rate_); 1090 beamformer_->Initialize(kChunkSizeMs, split_rate_);
1035 } 1091 }
1036 } 1092 }
1037 1093
1094 void AudioProcessingImpl::InitializeIntelligibility() {
1095 if (intelligibility_enabled_) {
1096 IntelligibilityEnhancer::Config config;
1097 config.sample_rate_hz = split_rate_;
1098 config.num_capture_channels = capture_audio_->num_channels();
1099 config.num_render_channels = render_audio_->num_channels();
1100 intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
1101 }
1102 }
1103
1038 void AudioProcessingImpl::MaybeUpdateHistograms() { 1104 void AudioProcessingImpl::MaybeUpdateHistograms() {
1039 static const int kMinDiffDelayMs = 60; 1105 static const int kMinDiffDelayMs = 60;
1040 1106
1041 if (echo_cancellation()->is_enabled()) { 1107 if (echo_cancellation()->is_enabled()) {
1042 // Activate delay_jumps_ counters if we know echo_cancellation is runnning. 1108 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.
1043 // If a stream has echo we know that the echo_cancellation is in process. 1109 // If a stream has echo we know that the echo_cancellation is in process.
1044 if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) { 1110 if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) {
1045 stream_delay_jumps_ = 0; 1111 stream_delay_jumps_ = 0;
1046 } 1112 }
1047 if (aec_system_delay_jumps_ == -1 && 1113 if (aec_system_delay_jumps_ == -1 &&
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1140 int err = WriteMessageToDebugFile(); 1206 int err = WriteMessageToDebugFile();
1141 if (err != kNoError) { 1207 if (err != kNoError) {
1142 return err; 1208 return err;
1143 } 1209 }
1144 1210
1145 return kNoError; 1211 return kNoError;
1146 } 1212 }
1147 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1213 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1148 1214
1149 } // namespace webrtc 1215 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698