Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(843)

Side by Side Diff: webrtc/modules/audio_processing/audio_processing_impl.cc

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Fixed memcpy Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 10 matching lines...) Expand all
21 #include "webrtc/modules/audio_processing/aec/aec_core.h" 21 #include "webrtc/modules/audio_processing/aec/aec_core.h"
22 } 22 }
23 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" 23 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
24 #include "webrtc/modules/audio_processing/audio_buffer.h" 24 #include "webrtc/modules/audio_processing/audio_buffer.h"
25 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" 25 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
26 #include "webrtc/modules/audio_processing/common.h" 26 #include "webrtc/modules/audio_processing/common.h"
27 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" 27 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
28 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" 28 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
29 #include "webrtc/modules/audio_processing/gain_control_impl.h" 29 #include "webrtc/modules/audio_processing/gain_control_impl.h"
30 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" 30 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
31 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
31 #include "webrtc/modules/audio_processing/level_estimator_impl.h" 32 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
32 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" 33 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
33 #include "webrtc/modules/audio_processing/processing_component.h" 34 #include "webrtc/modules/audio_processing/processing_component.h"
34 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" 35 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
35 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 36 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
36 #include "webrtc/modules/interface/module_common_types.h" 37 #include "webrtc/modules/interface/module_common_types.h"
37 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" 38 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
38 #include "webrtc/system_wrappers/interface/file_wrapper.h" 39 #include "webrtc/system_wrappers/interface/file_wrapper.h"
39 #include "webrtc/system_wrappers/interface/logging.h" 40 #include "webrtc/system_wrappers/interface/logging.h"
40 #include "webrtc/system_wrappers/interface/metrics.h" 41 #include "webrtc/system_wrappers/interface/metrics.h"
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 #endif 189 #endif
189 agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume), 190 agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume),
190 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) 191 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
191 transient_suppressor_enabled_(false), 192 transient_suppressor_enabled_(false),
192 #else 193 #else
193 transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled), 194 transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
194 #endif 195 #endif
195 beamformer_enabled_(config.Get<Beamforming>().enabled), 196 beamformer_enabled_(config.Get<Beamforming>().enabled),
196 beamformer_(beamformer), 197 beamformer_(beamformer),
197 array_geometry_(config.Get<Beamforming>().array_geometry), 198 array_geometry_(config.Get<Beamforming>().array_geometry),
198 supports_48kHz_(config.Get<AudioProcessing48kHzSupport>().enabled) { 199 supports_48kHz_(config.Get<AudioProcessing48kHzSupport>().enabled),
200 intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
199 echo_cancellation_ = new EchoCancellationImpl(this, crit_); 201 echo_cancellation_ = new EchoCancellationImpl(this, crit_);
200 component_list_.push_back(echo_cancellation_); 202 component_list_.push_back(echo_cancellation_);
201 203
202 echo_control_mobile_ = new EchoControlMobileImpl(this, crit_); 204 echo_control_mobile_ = new EchoControlMobileImpl(this, crit_);
203 component_list_.push_back(echo_control_mobile_); 205 component_list_.push_back(echo_control_mobile_);
204 206
205 gain_control_ = new GainControlImpl(this, crit_); 207 gain_control_ = new GainControlImpl(this, crit_);
206 component_list_.push_back(gain_control_); 208 component_list_.push_back(gain_control_);
207 209
208 high_pass_filter_ = new HighPassFilterImpl(this, crit_); 210 high_pass_filter_ = new HighPassFilterImpl(this, crit_);
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
298 return err; 300 return err;
299 } 301 }
300 } 302 }
301 303
302 InitializeExperimentalAgc(); 304 InitializeExperimentalAgc();
303 305
304 InitializeTransient(); 306 InitializeTransient();
305 307
306 InitializeBeamformer(); 308 InitializeBeamformer();
307 309
310 InitializeIntelligibility();
311
308 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 312 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
309 if (debug_file_->Open()) { 313 if (debug_file_->Open()) {
310 int err = WriteInitMessage(); 314 int err = WriteInitMessage();
311 if (err != kNoError) { 315 if (err != kNoError) {
312 return err; 316 return err;
313 } 317 }
314 } 318 }
315 #endif 319 #endif
316 320
317 return kNoError; 321 return kNoError;
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
592 msg->set_delay(stream_delay_ms_); 596 msg->set_delay(stream_delay_ms_);
593 msg->set_drift(echo_cancellation_->stream_drift_samples()); 597 msg->set_drift(echo_cancellation_->stream_drift_samples());
594 msg->set_level(gain_control()->stream_analog_level()); 598 msg->set_level(gain_control()->stream_analog_level());
595 msg->set_keypress(key_pressed_); 599 msg->set_keypress(key_pressed_);
596 } 600 }
597 #endif 601 #endif
598 602
599 MaybeUpdateHistograms(); 603 MaybeUpdateHistograms();
600 604
601 AudioBuffer* ca = capture_audio_.get(); // For brevity. 605 AudioBuffer* ca = capture_audio_.get(); // For brevity.
606
602 if (use_new_agc_ && gain_control_->is_enabled()) { 607 if (use_new_agc_ && gain_control_->is_enabled()) {
603 agc_manager_->AnalyzePreProcess(ca->channels()[0], 608 agc_manager_->AnalyzePreProcess(ca->channels()[0],
604 ca->num_channels(), 609 ca->num_channels(),
605 fwd_proc_format_.samples_per_channel()); 610 fwd_proc_format_.samples_per_channel());
606 } 611 }
607 612
608 bool data_processed = is_data_processed(); 613 bool data_processed = is_data_processed();
609 if (analysis_needed(data_processed)) { 614 if (analysis_needed(data_processed)) {
610 ca->SplitIntoFrequencyBands(); 615 ca->SplitIntoFrequencyBands();
611 } 616 }
612 617
618 if (intelligibility_enabled_) {
619 intelligibility_enhancer_->AnalyzeCaptureAudio(
620 ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());
621 }
622
613 if (beamformer_enabled_) { 623 if (beamformer_enabled_) {
614 beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); 624 beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());
615 ca->set_num_channels(1); 625 ca->set_num_channels(1);
616 } 626 }
617 627
618 RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca)); 628 RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca));
619 RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca)); 629 RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca));
620 RETURN_ON_ERR(noise_suppression_->AnalyzeCaptureAudio(ca)); 630 RETURN_ON_ERR(noise_suppression_->AnalyzeCaptureAudio(ca));
621 RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca)); 631 RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca));
622 632
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
657 key_pressed_); 667 key_pressed_);
658 } 668 }
659 669
660 // The level estimator operates on the recombined data. 670 // The level estimator operates on the recombined data.
661 RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); 671 RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
662 672
663 was_stream_delay_set_ = false; 673 was_stream_delay_set_ = false;
664 return kNoError; 674 return kNoError;
665 } 675 }
666 676
677 int AudioProcessingImpl::ProcessReverseStream(float* const* data,
678 int samples_per_channel,
679 int rev_sample_rate_hz,
680 ChannelLayout layout) {
681 RETURN_ON_ERR(AnalyzeReverseStream(data, samples_per_channel,
682 rev_sample_rate_hz, layout));
683 if (intelligibility_enabled_) {
684 render_audio_->CopyTo(samples_per_channel, layout, data);
685 }
686
687 return kNoError;
688 }
689
667 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, 690 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
668 int samples_per_channel, 691 int samples_per_channel,
669 int sample_rate_hz, 692 int rev_sample_rate_hz,
670 ChannelLayout layout) { 693 ChannelLayout layout) {
671 CriticalSectionScoped crit_scoped(crit_); 694 CriticalSectionScoped crit_scoped(crit_);
672 if (data == NULL) { 695 if (data == NULL) {
673 return kNullPointerError; 696 return kNullPointerError;
674 } 697 }
675 698
676 const int num_channels = ChannelsFromLayout(layout); 699 const int num_channels = ChannelsFromLayout(layout);
677 RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), 700 RETURN_ON_ERR(
678 fwd_out_format_.rate(), 701 MaybeInitializeLocked(fwd_in_format_.rate(), fwd_out_format_.rate(),
679 sample_rate_hz, 702 rev_sample_rate_hz, fwd_in_format_.num_channels(),
680 fwd_in_format_.num_channels(), 703 fwd_out_format_.num_channels(), num_channels));
681 fwd_out_format_.num_channels(),
682 num_channels));
683 if (samples_per_channel != rev_in_format_.samples_per_channel()) { 704 if (samples_per_channel != rev_in_format_.samples_per_channel()) {
684 return kBadDataLengthError; 705 return kBadDataLengthError;
685 } 706 }
686 707
687 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 708 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
688 if (debug_file_->Open()) { 709 if (debug_file_->Open()) {
689 event_msg_->set_type(audioproc::Event::REVERSE_STREAM); 710 event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
690 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 711 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
691 const size_t channel_size = 712 const size_t channel_size =
692 sizeof(float) * rev_in_format_.samples_per_channel(); 713 sizeof(float) * rev_in_format_.samples_per_channel();
693 for (int i = 0; i < num_channels; ++i) 714 for (int i = 0; i < num_channels; ++i)
694 msg->add_channel(data[i], channel_size); 715 msg->add_channel(data[i], channel_size);
695 RETURN_ON_ERR(WriteMessageToDebugFile()); 716 RETURN_ON_ERR(WriteMessageToDebugFile());
696 } 717 }
697 #endif 718 #endif
698 719
699 render_audio_->CopyFrom(data, samples_per_channel, layout); 720 render_audio_->CopyFrom(data, samples_per_channel, layout);
700 return AnalyzeReverseStreamLocked(); 721 return ProcessReverseStreamLocked();
701 } 722 }
702 723
703 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { 724 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
Andrew MacDonald 2015/07/24 23:50:39 The AudioFrame* and float* AnalyzeReverseStreams h
ekm 2015/07/29 00:37:19 Acknowledged.
704 CriticalSectionScoped crit_scoped(crit_); 725 CriticalSectionScoped crit_scoped(crit_);
705 if (frame == NULL) { 726 if (frame == NULL) {
706 return kNullPointerError; 727 return kNullPointerError;
707 } 728 }
708 // Must be a native rate. 729 // Must be a native rate.
709 if (frame->sample_rate_hz_ != kSampleRate8kHz && 730 if (frame->sample_rate_hz_ != kSampleRate8kHz &&
710 frame->sample_rate_hz_ != kSampleRate16kHz && 731 frame->sample_rate_hz_ != kSampleRate16kHz &&
711 frame->sample_rate_hz_ != kSampleRate32kHz && 732 frame->sample_rate_hz_ != kSampleRate32kHz &&
712 frame->sample_rate_hz_ != kSampleRate48kHz) { 733 frame->sample_rate_hz_ != kSampleRate48kHz) {
713 return kBadSampleRateError; 734 return kBadSampleRateError;
714 } 735 }
715 // This interface does not tolerate different forward and reverse rates. 736 // This interface does not tolerate different forward and reverse rates.
716 if (frame->sample_rate_hz_ != fwd_in_format_.rate()) { 737 if (frame->sample_rate_hz_ != fwd_in_format_.rate()) {
717 return kBadSampleRateError; 738 return kBadSampleRateError;
718 } 739 }
719
720 RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), 740 RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),
721 fwd_out_format_.rate(), 741 fwd_out_format_.rate(),
722 frame->sample_rate_hz_, 742 frame->sample_rate_hz_,
723 fwd_in_format_.num_channels(), 743 fwd_in_format_.num_channels(),
724 fwd_in_format_.num_channels(), 744 fwd_in_format_.num_channels(),
725 frame->num_channels_)); 745 frame->num_channels_));
726 if (frame->samples_per_channel_ != rev_in_format_.samples_per_channel()) { 746 if (frame->samples_per_channel_ != rev_in_format_.samples_per_channel()) {
727 return kBadDataLengthError; 747 return kBadDataLengthError;
728 } 748 }
729 749
730 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP 750 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
731 if (debug_file_->Open()) { 751 if (debug_file_->Open()) {
732 event_msg_->set_type(audioproc::Event::REVERSE_STREAM); 752 event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
733 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); 753 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
734 const size_t data_size = sizeof(int16_t) * 754 const size_t data_size = sizeof(int16_t) *
735 frame->samples_per_channel_ * 755 frame->samples_per_channel_ *
736 frame->num_channels_; 756 frame->num_channels_;
737 msg->set_data(frame->data_, data_size); 757 msg->set_data(frame->data_, data_size);
738 RETURN_ON_ERR(WriteMessageToDebugFile()); 758 RETURN_ON_ERR(WriteMessageToDebugFile());
739 } 759 }
740 #endif 760 #endif
761 render_audio_->DeinterleaveFrom(frame);
762 RETURN_ON_ERR(ProcessReverseStreamLocked());
763 render_audio_->InterleaveTo(frame, intelligibility_enabled_);
Andrew MacDonald 2015/07/24 23:50:39 I think more clear if you add the check here: if (
ekm 2015/07/29 00:37:19 Done. Added is_rev_processed() to clean up these c
Andrew MacDonald 2015/07/29 03:52:27 Nice.
741 764
742 render_audio_->DeinterleaveFrom(frame); 765 return kNoError;
743 return AnalyzeReverseStreamLocked();
744 } 766 }
745 767
746 int AudioProcessingImpl::AnalyzeReverseStreamLocked() { 768 int AudioProcessingImpl::ProcessReverseStreamLocked() {
747 AudioBuffer* ra = render_audio_.get(); // For brevity. 769 AudioBuffer* ra = render_audio_.get(); // For brevity.
748 if (rev_proc_format_.rate() == kSampleRate32kHz) { 770 if (rev_proc_format_.rate() == kSampleRate32kHz) {
749 ra->SplitIntoFrequencyBands(); 771 ra->SplitIntoFrequencyBands();
750 } 772 }
751 773
774 if (intelligibility_enabled_) {
775 intelligibility_enhancer_->ProcessRenderAudio(
776 ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());
777 }
778
752 RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); 779 RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
753 RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); 780 RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
754 if (!use_new_agc_) { 781 if (!use_new_agc_) {
755 RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); 782 RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
756 } 783 }
757 784
785 if (rev_proc_format_.rate() == kSampleRate32kHz) {
Andrew MacDonald 2015/07/24 23:50:39 && intelligbility_enabled_
ekm 2015/07/29 00:37:19 Done.
786 ra->MergeFrequencyBands();
787 }
788
758 return kNoError; 789 return kNoError;
759 } 790 }
760 791
761 int AudioProcessingImpl::set_stream_delay_ms(int delay) { 792 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
762 Error retval = kNoError; 793 Error retval = kNoError;
763 was_stream_delay_set_ = true; 794 was_stream_delay_set_ = true;
764 delay += delay_offset_ms_; 795 delay += delay_offset_ms_;
765 796
766 if (delay < 0) { 797 if (delay < 0) {
767 delay = 0; 798 delay = 0;
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
994 1025
995 void AudioProcessingImpl::InitializeBeamformer() { 1026 void AudioProcessingImpl::InitializeBeamformer() {
996 if (beamformer_enabled_) { 1027 if (beamformer_enabled_) {
997 if (!beamformer_) { 1028 if (!beamformer_) {
998 beamformer_.reset(new NonlinearBeamformer(array_geometry_)); 1029 beamformer_.reset(new NonlinearBeamformer(array_geometry_));
999 } 1030 }
1000 beamformer_->Initialize(kChunkSizeMs, split_rate_); 1031 beamformer_->Initialize(kChunkSizeMs, split_rate_);
1001 } 1032 }
1002 } 1033 }
1003 1034
1035 void AudioProcessingImpl::InitializeIntelligibility() {
1036 if (intelligibility_enabled_) {
1037 IntelligibilityEnhancer::Config config;
1038 config.sample_rate_hz = split_rate_;
1039 config.num_capture_channels = capture_audio_->num_channels();
1040 config.num_render_channels = render_audio_->num_channels();
1041 intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
1042 }
1043 }
1044
1004 void AudioProcessingImpl::MaybeUpdateHistograms() { 1045 void AudioProcessingImpl::MaybeUpdateHistograms() {
1005 static const int kMinDiffDelayMs = 60; 1046 static const int kMinDiffDelayMs = 60;
1006 1047
1007 if (echo_cancellation()->is_enabled()) { 1048 if (echo_cancellation()->is_enabled()) {
1008 // Activate delay_jumps_ counters if we know echo_cancellation is runnning. 1049 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.
1009 // If a stream has echo we know that the echo_cancellation is in process. 1050 // If a stream has echo we know that the echo_cancellation is in process.
1010 if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) { 1051 if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) {
1011 stream_delay_jumps_ = 0; 1052 stream_delay_jumps_ = 0;
1012 } 1053 }
1013 if (aec_system_delay_jumps_ == -1 && 1054 if (aec_system_delay_jumps_ == -1 &&
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1106 int err = WriteMessageToDebugFile(); 1147 int err = WriteMessageToDebugFile();
1107 if (err != kNoError) { 1148 if (err != kNoError) {
1108 return err; 1149 return err;
1109 } 1150 }
1110 1151
1111 return kNoError; 1152 return kNoError;
1112 } 1153 }
1113 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP 1154 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1114 1155
1115 } // namespace webrtc 1156 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698