| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
 | 
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
 | 
| index dbb7e638b2d063c52e876ec53df090893e2d4d4c..8eccde452c359cd49c0a29096a1cfc9628b261a7 100644
 | 
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
 | 
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
 | 
| @@ -19,18 +19,18 @@
 | 
|  
 | 
|  #include <math.h>
 | 
|  #include <stdlib.h>
 | 
| -
 | 
|  #include <algorithm>
 | 
|  #include <numeric>
 | 
|  
 | 
|  #include "webrtc/base/checks.h"
 | 
| -#include "webrtc/common_audio/vad/include/webrtc_vad.h"
 | 
| +#include "webrtc/common_audio/include/audio_util.h"
 | 
|  #include "webrtc/common_audio/window_generator.h"
 | 
|  
 | 
|  namespace webrtc {
 | 
|  
 | 
|  namespace {
 | 
|  
 | 
| +const int kErbResolution = 2;
 | 
|  const int kWindowSizeMs = 2;
 | 
|  const int kChunkSizeMs = 10;  // Size provided by APM.
 | 
|  const float kClipFreq = 200.0f;
 | 
| @@ -64,124 +64,93 @@ void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
 | 
|    }
 | 
|  }
 | 
|  
 | 
| -IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution,
 | 
| -                                                 int sample_rate_hz,
 | 
| -                                                 int channels,
 | 
| -                                                 int cv_type,
 | 
| -                                                 float cv_alpha,
 | 
| -                                                 int cv_win,
 | 
| -                                                 int analysis_rate,
 | 
| -                                                 int variance_rate,
 | 
| -                                                 float gain_limit)
 | 
| +IntelligibilityEnhancer::IntelligibilityEnhancer()
 | 
| +    : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
 | 
| +}
 | 
| +
 | 
| +IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
 | 
|      : freqs_(RealFourier::ComplexLength(
 | 
| -          RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
 | 
| +          RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
 | 
|        window_size_(1 << RealFourier::FftOrder(freqs_)),
 | 
| -      chunk_length_(sample_rate_hz * kChunkSizeMs / 1000),
 | 
| -      bank_size_(GetBankSize(sample_rate_hz, erb_resolution)),
 | 
| -      sample_rate_hz_(sample_rate_hz),
 | 
| -      erb_resolution_(erb_resolution),
 | 
| -      channels_(channels),
 | 
| -      analysis_rate_(analysis_rate),
 | 
| -      variance_rate_(variance_rate),
 | 
| +      chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000),
 | 
| +      bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
 | 
| +      sample_rate_hz_(config.sample_rate_hz),
 | 
| +      erb_resolution_(kErbResolution),
 | 
| +      num_capture_channels_(config.num_capture_channels),
 | 
| +      num_render_channels_(config.num_render_channels),
 | 
| +      analysis_rate_(config.analysis_rate),
 | 
| +      active_(true),
 | 
|        clear_variance_(freqs_,
 | 
| -                      static_cast<VarianceType>(cv_type),
 | 
| -                      cv_win,
 | 
| -                      cv_alpha),
 | 
| -      noise_variance_(freqs_, VarianceType::kStepInfinite, 475, 0.01f),
 | 
| +                      config.var_type,
 | 
| +                      config.var_window_size,
 | 
| +                      config.var_decay_rate),
 | 
| +      noise_variance_(freqs_,
 | 
| +                      config.var_type,
 | 
| +                      config.var_window_size,
 | 
| +                      config.var_decay_rate),
 | 
|        filtered_clear_var_(new float[bank_size_]),
 | 
|        filtered_noise_var_(new float[bank_size_]),
 | 
|        filter_bank_(bank_size_),
 | 
|        center_freqs_(new float[bank_size_]),
 | 
|        rho_(new float[bank_size_]),
 | 
|        gains_eq_(new float[bank_size_]),
 | 
| -      gain_applier_(freqs_, gain_limit),
 | 
| -      temp_out_buffer_(nullptr),
 | 
| -      input_audio_(new float* [channels]),
 | 
| +      gain_applier_(freqs_, config.gain_change_limit),
 | 
| +      temp_render_out_buffer_(chunk_length_, num_render_channels_),
 | 
| +      temp_capture_out_buffer_(chunk_length_, num_capture_channels_),
 | 
|        kbd_window_(new float[window_size_]),
 | 
|        render_callback_(this, AudioSource::kRenderStream),
 | 
|        capture_callback_(this, AudioSource::kCaptureStream),
 | 
|        block_count_(0),
 | 
| -      analysis_step_(0),
 | 
| -      vad_high_(WebRtcVad_Create()),
 | 
| -      vad_low_(WebRtcVad_Create()),
 | 
| -      vad_tmp_buffer_(new int16_t[chunk_length_]) {
 | 
| -  DCHECK_LE(kConfigRho, 1.0f);
 | 
| +      analysis_step_(0) {
 | 
| +  DCHECK_LE(config.rho, 1.0f);
 | 
|  
 | 
|    CreateErbBank();
 | 
|  
 | 
| -  WebRtcVad_Init(vad_high_);
 | 
| -  WebRtcVad_set_mode(vad_high_, 0);  // High likelihood of speech.
 | 
| -  WebRtcVad_Init(vad_low_);
 | 
| -  WebRtcVad_set_mode(vad_low_, 3);  // Low likelihood of speech.
 | 
| -
 | 
| -  temp_out_buffer_ = static_cast<float**>(
 | 
| -      malloc(sizeof(*temp_out_buffer_) * channels_ +
 | 
| -             sizeof(**temp_out_buffer_) * chunk_length_ * channels_));
 | 
| -  for (int i = 0; i < channels_; ++i) {
 | 
| -    temp_out_buffer_[i] =
 | 
| -        reinterpret_cast<float*>(temp_out_buffer_ + channels_) +
 | 
| -        chunk_length_ * i;
 | 
| -  }
 | 
| -
 | 
|    // Assumes all rho equal.
 | 
|    for (int i = 0; i < bank_size_; ++i) {
 | 
| -    rho_[i] = kConfigRho * kConfigRho;
 | 
| +    rho_[i] = config.rho * config.rho;
 | 
|    }
 | 
|  
 | 
|    float freqs_khz = kClipFreq / 1000.0f;
 | 
|    int erb_index = static_cast<int>(ceilf(
 | 
|        11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
 | 
| -  start_freq_ = std::max(1, erb_index * erb_resolution);
 | 
| +  start_freq_ = max(1, erb_index * erb_resolution_);
 | 
|  
 | 
|    WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
 | 
|                                         kbd_window_.get());
 | 
|    render_mangler_.reset(new LappedTransform(
 | 
| -      channels_, channels_, chunk_length_, kbd_window_.get(), window_size_,
 | 
| -      window_size_ / 2, &render_callback_));
 | 
| +      num_render_channels_, num_render_channels_, chunk_length_,
 | 
| +      kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
 | 
|    capture_mangler_.reset(new LappedTransform(
 | 
| -      channels_, channels_, chunk_length_, kbd_window_.get(), window_size_,
 | 
| -      window_size_ / 2, &capture_callback_));
 | 
| +      num_capture_channels_, num_capture_channels_, chunk_length_,
 | 
| +      kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));
 | 
|  }
 | 
|  
 | 
| -IntelligibilityEnhancer::~IntelligibilityEnhancer() {
 | 
| -  WebRtcVad_Free(vad_low_);
 | 
| -  WebRtcVad_Free(vad_high_);
 | 
| -  free(temp_out_buffer_);
 | 
| -}
 | 
| +void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
 | 
| +                                                 int sample_rate_hz,
 | 
| +                                                 int num_channels) {
 | 
| +  CHECK_EQ(sample_rate_hz_, sample_rate_hz);
 | 
| +  CHECK_EQ(num_render_channels_, num_channels);
 | 
|  
 | 
| -void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) {
 | 
| -  for (int i = 0; i < chunk_length_; ++i) {
 | 
| -    vad_tmp_buffer_[i] = (int16_t)audio[0][i];
 | 
| +  if (active_) {
 | 
| +    render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
 | 
|    }
 | 
| -  has_voice_low_ = WebRtcVad_Process(vad_low_, sample_rate_hz_,
 | 
| -                                     vad_tmp_buffer_.get(), chunk_length_) == 1;
 | 
|  
 | 
| -  // Process and enhance chunk of |audio|
 | 
| -  render_mangler_->ProcessChunk(audio, temp_out_buffer_);
 | 
| -
 | 
| -  for (int i = 0; i < channels_; ++i) {
 | 
| -    memcpy(audio[i], temp_out_buffer_[i],
 | 
| -           chunk_length_ * sizeof(**temp_out_buffer_));
 | 
| +  if (active_) {
 | 
| +    for (int i = 0; i < num_render_channels_; ++i) {
 | 
| +      memcpy(audio[i], temp_render_out_buffer_.channels()[i],
 | 
| +             chunk_length_ * sizeof(**audio));
 | 
| +    }
 | 
|    }
 | 
|  }
 | 
|  
 | 
| -void IntelligibilityEnhancer::ProcessCaptureAudio(float* const* audio) {
 | 
| -  for (int i = 0; i < chunk_length_; ++i) {
 | 
| -    vad_tmp_buffer_[i] = (int16_t)audio[0][i];
 | 
| -  }
 | 
| -  // TODO(bercic): The VAD was always detecting voice in the noise stream,
 | 
| -  // no matter what the aggressiveness, so it was temporarily disabled here.
 | 
| -
 | 
| -  #if 0
 | 
| -    if (WebRtcVad_Process(vad_high_, sample_rate_hz_, vad_tmp_buffer_.get(),
 | 
| -      chunk_length_) == 1) {
 | 
| -      printf("capture HAS speech\n");
 | 
| -      return;
 | 
| -    }
 | 
| -    printf("capture NO speech\n");
 | 
| -  #endif
 | 
| +void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio,
 | 
| +                                                  int sample_rate_hz,
 | 
| +                                                  int num_channels) {
 | 
| +  CHECK_EQ(sample_rate_hz_, sample_rate_hz);
 | 
| +  CHECK_EQ(num_capture_channels_, num_channels);
 | 
|  
 | 
| -  capture_mangler_->ProcessChunk(audio, temp_out_buffer_);
 | 
| +  capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels());
 | 
|  }
 | 
|  
 | 
|  void IntelligibilityEnhancer::DispatchAudio(
 | 
| @@ -206,28 +175,21 @@ void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
 | 
|      return;
 | 
|    }
 | 
|  
 | 
| -  // For now, always assumes enhancement is necessary.
 | 
| -  // TODO(ekmeyerson): Change to only enhance if necessary,
 | 
| -  // based on experiments with different cutoffs.
 | 
| -  if (has_voice_low_ || true) {
 | 
| +  // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary.
 | 
| +  if (true) {
 | 
|      clear_variance_.Step(in_block, false);
 | 
| -    const float power_target = std::accumulate(
 | 
| -        clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.0f);
 | 
| -
 | 
|      if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
 | 
| +      const float power_target = std::accumulate(
 | 
| +          clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f);
 | 
|        AnalyzeClearBlock(power_target);
 | 
|        ++analysis_step_;
 | 
| -      if (analysis_step_ == variance_rate_) {
 | 
| -        analysis_step_ = 0;
 | 
| -        clear_variance_.Clear();
 | 
| -        noise_variance_.Clear();
 | 
| -      }
 | 
|      }
 | 
|      ++block_count_;
 | 
|    }
 | 
|  
 | 
| -  /* efidata(n,:) = sqrt(b(n)) * fidata(n,:) */
 | 
| -  gain_applier_.Apply(in_block, out_block);
 | 
| +  if (active_) {
 | 
| +    gain_applier_.Apply(in_block, out_block);
 | 
| +  }
 | 
|  }
 | 
|  
 | 
|  void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
 | 
| @@ -406,4 +368,8 @@ float IntelligibilityEnhancer::DotProduct(const float* a,
 | 
|    return ret;
 | 
|  }
 | 
|  
 | 
| +bool IntelligibilityEnhancer::active() const {
 | 
| +  return active_;
 | 
| +}
 | 
| +
 | 
|  }  // namespace webrtc
 | 
| 
 |