| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc | 
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc | 
| index dbb7e638b2d063c52e876ec53df090893e2d4d4c..8eccde452c359cd49c0a29096a1cfc9628b261a7 100644 | 
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc | 
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc | 
| @@ -19,18 +19,18 @@ | 
|  | 
| #include <math.h> | 
| #include <stdlib.h> | 
| - | 
| #include <algorithm> | 
| #include <numeric> | 
|  | 
| #include "webrtc/base/checks.h" | 
| -#include "webrtc/common_audio/vad/include/webrtc_vad.h" | 
| +#include "webrtc/common_audio/include/audio_util.h" | 
| #include "webrtc/common_audio/window_generator.h" | 
|  | 
| namespace webrtc { | 
|  | 
| namespace { | 
|  | 
| +const int kErbResolution = 2; | 
| const int kWindowSizeMs = 2; | 
| const int kChunkSizeMs = 10;  // Size provided by APM. | 
| const float kClipFreq = 200.0f; | 
| @@ -64,124 +64,93 @@ void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( | 
| } | 
| } | 
|  | 
| -IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution, | 
| -                                                 int sample_rate_hz, | 
| -                                                 int channels, | 
| -                                                 int cv_type, | 
| -                                                 float cv_alpha, | 
| -                                                 int cv_win, | 
| -                                                 int analysis_rate, | 
| -                                                 int variance_rate, | 
| -                                                 float gain_limit) | 
| +IntelligibilityEnhancer::IntelligibilityEnhancer() | 
| +    : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { | 
| +} | 
| + | 
| +IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) | 
| : freqs_(RealFourier::ComplexLength( | 
| -          RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), | 
| +          RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), | 
| window_size_(1 << RealFourier::FftOrder(freqs_)), | 
| -      chunk_length_(sample_rate_hz * kChunkSizeMs / 1000), | 
| -      bank_size_(GetBankSize(sample_rate_hz, erb_resolution)), | 
| -      sample_rate_hz_(sample_rate_hz), | 
| -      erb_resolution_(erb_resolution), | 
| -      channels_(channels), | 
| -      analysis_rate_(analysis_rate), | 
| -      variance_rate_(variance_rate), | 
| +      chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000), | 
| +      bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), | 
| +      sample_rate_hz_(config.sample_rate_hz), | 
| +      erb_resolution_(kErbResolution), | 
| +      num_capture_channels_(config.num_capture_channels), | 
| +      num_render_channels_(config.num_render_channels), | 
| +      analysis_rate_(config.analysis_rate), | 
| +      active_(true), | 
| clear_variance_(freqs_, | 
| -                      static_cast<VarianceType>(cv_type), | 
| -                      cv_win, | 
| -                      cv_alpha), | 
| -      noise_variance_(freqs_, VarianceType::kStepInfinite, 475, 0.01f), | 
| +                      config.var_type, | 
| +                      config.var_window_size, | 
| +                      config.var_decay_rate), | 
| +      noise_variance_(freqs_, | 
| +                      config.var_type, | 
| +                      config.var_window_size, | 
| +                      config.var_decay_rate), | 
| filtered_clear_var_(new float[bank_size_]), | 
| filtered_noise_var_(new float[bank_size_]), | 
| filter_bank_(bank_size_), | 
| center_freqs_(new float[bank_size_]), | 
| rho_(new float[bank_size_]), | 
| gains_eq_(new float[bank_size_]), | 
| -      gain_applier_(freqs_, gain_limit), | 
| -      temp_out_buffer_(nullptr), | 
| -      input_audio_(new float* [channels]), | 
| +      gain_applier_(freqs_, config.gain_change_limit), | 
| +      temp_render_out_buffer_(chunk_length_, num_render_channels_), | 
| +      temp_capture_out_buffer_(chunk_length_, num_capture_channels_), | 
| kbd_window_(new float[window_size_]), | 
| render_callback_(this, AudioSource::kRenderStream), | 
| capture_callback_(this, AudioSource::kCaptureStream), | 
| block_count_(0), | 
| -      analysis_step_(0), | 
| -      vad_high_(WebRtcVad_Create()), | 
| -      vad_low_(WebRtcVad_Create()), | 
| -      vad_tmp_buffer_(new int16_t[chunk_length_]) { | 
| -  DCHECK_LE(kConfigRho, 1.0f); | 
| +      analysis_step_(0) { | 
| +  DCHECK_LE(config.rho, 1.0f); | 
|  | 
| CreateErbBank(); | 
|  | 
| -  WebRtcVad_Init(vad_high_); | 
| -  WebRtcVad_set_mode(vad_high_, 0);  // High likelihood of speech. | 
| -  WebRtcVad_Init(vad_low_); | 
| -  WebRtcVad_set_mode(vad_low_, 3);  // Low likelihood of speech. | 
| - | 
| -  temp_out_buffer_ = static_cast<float**>( | 
| -      malloc(sizeof(*temp_out_buffer_) * channels_ + | 
| -             sizeof(**temp_out_buffer_) * chunk_length_ * channels_)); | 
| -  for (int i = 0; i < channels_; ++i) { | 
| -    temp_out_buffer_[i] = | 
| -        reinterpret_cast<float*>(temp_out_buffer_ + channels_) + | 
| -        chunk_length_ * i; | 
| -  } | 
| - | 
| // Assumes all rho equal. | 
| for (int i = 0; i < bank_size_; ++i) { | 
| -    rho_[i] = kConfigRho * kConfigRho; | 
| +    rho_[i] = config.rho * config.rho; | 
| } | 
|  | 
| float freqs_khz = kClipFreq / 1000.0f; | 
| int erb_index = static_cast<int>(ceilf( | 
| 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); | 
| -  start_freq_ = std::max(1, erb_index * erb_resolution); | 
| +  start_freq_ = max(1, erb_index * erb_resolution_); | 
|  | 
| WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, | 
| kbd_window_.get()); | 
| render_mangler_.reset(new LappedTransform( | 
| -      channels_, channels_, chunk_length_, kbd_window_.get(), window_size_, | 
| -      window_size_ / 2, &render_callback_)); | 
| +      num_render_channels_, num_render_channels_, chunk_length_, | 
| +      kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); | 
| capture_mangler_.reset(new LappedTransform( | 
| -      channels_, channels_, chunk_length_, kbd_window_.get(), window_size_, | 
| -      window_size_ / 2, &capture_callback_)); | 
| +      num_capture_channels_, num_capture_channels_, chunk_length_, | 
| +      kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); | 
| } | 
|  | 
| -IntelligibilityEnhancer::~IntelligibilityEnhancer() { | 
| -  WebRtcVad_Free(vad_low_); | 
| -  WebRtcVad_Free(vad_high_); | 
| -  free(temp_out_buffer_); | 
| -} | 
| +void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, | 
| +                                                 int sample_rate_hz, | 
| +                                                 int num_channels) { | 
| +  CHECK_EQ(sample_rate_hz_, sample_rate_hz); | 
| +  CHECK_EQ(num_render_channels_, num_channels); | 
|  | 
| -void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) { | 
| -  for (int i = 0; i < chunk_length_; ++i) { | 
| -    vad_tmp_buffer_[i] = (int16_t)audio[0][i]; | 
| +  if (active_) { | 
| +    render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels()); | 
| } | 
| -  has_voice_low_ = WebRtcVad_Process(vad_low_, sample_rate_hz_, | 
| -                                     vad_tmp_buffer_.get(), chunk_length_) == 1; | 
|  | 
| -  // Process and enhance chunk of |audio| | 
| -  render_mangler_->ProcessChunk(audio, temp_out_buffer_); | 
| - | 
| -  for (int i = 0; i < channels_; ++i) { | 
| -    memcpy(audio[i], temp_out_buffer_[i], | 
| -           chunk_length_ * sizeof(**temp_out_buffer_)); | 
| +  if (active_) { | 
| +    for (int i = 0; i < num_render_channels_; ++i) { | 
| +      memcpy(audio[i], temp_render_out_buffer_.channels()[i], | 
| +             chunk_length_ * sizeof(**audio)); | 
| +    } | 
| } | 
| } | 
|  | 
| -void IntelligibilityEnhancer::ProcessCaptureAudio(float* const* audio) { | 
| -  for (int i = 0; i < chunk_length_; ++i) { | 
| -    vad_tmp_buffer_[i] = (int16_t)audio[0][i]; | 
| -  } | 
| -  // TODO(bercic): The VAD was always detecting voice in the noise stream, | 
| -  // no matter what the aggressiveness, so it was temporarily disabled here. | 
| - | 
| -  #if 0 | 
| -    if (WebRtcVad_Process(vad_high_, sample_rate_hz_, vad_tmp_buffer_.get(), | 
| -      chunk_length_) == 1) { | 
| -      printf("capture HAS speech\n"); | 
| -      return; | 
| -    } | 
| -    printf("capture NO speech\n"); | 
| -  #endif | 
| +void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio, | 
| +                                                  int sample_rate_hz, | 
| +                                                  int num_channels) { | 
| +  CHECK_EQ(sample_rate_hz_, sample_rate_hz); | 
| +  CHECK_EQ(num_capture_channels_, num_channels); | 
|  | 
| -  capture_mangler_->ProcessChunk(audio, temp_out_buffer_); | 
| +  capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels()); | 
| } | 
|  | 
| void IntelligibilityEnhancer::DispatchAudio( | 
| @@ -206,28 +175,21 @@ void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, | 
| return; | 
| } | 
|  | 
| -  // For now, always assumes enhancement is necessary. | 
| -  // TODO(ekmeyerson): Change to only enhance if necessary, | 
| -  // based on experiments with different cutoffs. | 
| -  if (has_voice_low_ || true) { | 
| +  // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary. | 
| +  if (true) { | 
| clear_variance_.Step(in_block, false); | 
| -    const float power_target = std::accumulate( | 
| -        clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.0f); | 
| - | 
| if (block_count_ % analysis_rate_ == analysis_rate_ - 1) { | 
| +      const float power_target = std::accumulate( | 
| +          clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f); | 
| AnalyzeClearBlock(power_target); | 
| ++analysis_step_; | 
| -      if (analysis_step_ == variance_rate_) { | 
| -        analysis_step_ = 0; | 
| -        clear_variance_.Clear(); | 
| -        noise_variance_.Clear(); | 
| -      } | 
| } | 
| ++block_count_; | 
| } | 
|  | 
| -  /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */ | 
| -  gain_applier_.Apply(in_block, out_block); | 
| +  if (active_) { | 
| +    gain_applier_.Apply(in_block, out_block); | 
| +  } | 
| } | 
|  | 
| void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { | 
| @@ -406,4 +368,8 @@ float IntelligibilityEnhancer::DotProduct(const float* a, | 
| return ret; | 
| } | 
|  | 
| +bool IntelligibilityEnhancer::active() const { | 
| +  return active_; | 
| +} | 
| + | 
| }  // namespace webrtc | 
|  |