| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
 | 
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
 | 
| index df47de597885ed61d9dd9a824d2c6505c1be99a4..8909b02c457b4e68f05bc55fbb394d38d2ea7f10 100644
 | 
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
 | 
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
 | 
| @@ -20,11 +20,9 @@
 | 
|  
 | 
|  #include "webrtc/base/scoped_ptr.h"
 | 
|  #include "webrtc/common_audio/lapped_transform.h"
 | 
| +#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
 | 
|  #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
 | 
|  
 | 
| -struct WebRtcVadInst;
 | 
| -typedef struct WebRtcVadInst VadInst;
 | 
| -
 | 
|  namespace webrtc {
 | 
|  
 | 
|  // Speech intelligibility enhancement module. Reads render and capture
 | 
| @@ -33,32 +31,66 @@ namespace webrtc {
 | 
|  // Note: assumes speech and noise streams are already separated.
 | 
|  class IntelligibilityEnhancer {
 | 
|   public:
 | 
| -  // Construct a new instance with the given filter bank resolution,
 | 
| -  // sampling rate, number of channels and analysis rates.
 | 
| -  // |analysis_rate| sets the number of input blocks (containing speech!)
 | 
| -  // to elapse before a new gain computation is made. |variance_rate| specifies
 | 
| -  // the number of gain recomputations after which the variances are reset.
 | 
| -  // |cv_*| are parameters for the VarianceArray constructor for the
 | 
| -  // clear speech stream.
 | 
| -  // TODO(bercic): the |cv_*|, |*_rate| and |gain_limit| parameters should
 | 
| -  // probably go away once fine tuning is done. They override the internal
 | 
| -  // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).
 | 
| -  IntelligibilityEnhancer(int erb_resolution,
 | 
| -                          int sample_rate_hz,
 | 
| -                          int channels,
 | 
| -                          int cv_type,
 | 
| -                          float cv_alpha,
 | 
| -                          int cv_win,
 | 
| -                          int analysis_rate,
 | 
| -                          int variance_rate,
 | 
| -                          float gain_limit);
 | 
| +  struct Config {
 | 
| +    // |var_*| are parameters for the VarianceArray constructor for the
 | 
| +    // clear speech stream.
 | 
| +    // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should
 | 
| +    // probably go away once fine tuning is done.
 | 
| +    Config()
 | 
| +        : sample_rate_hz(16000),
 | 
| +          num_capture_channels(1),
 | 
| +          num_render_channels(1),
 | 
| +          var_type(intelligibility::VarianceArray::kStepDecaying),
 | 
| +          var_decay_rate(0.9f),
 | 
| +          var_window_size(10),
 | 
| +          analysis_rate(800),
 | 
| +          gain_change_limit(0.1f),
 | 
| +          rho(0.02f),
 | 
| +          capture_vad_thresh(1.f),
 | 
| +          render_vad_thresh(0.f),
 | 
| +          activate_snr_thresh(0.f),
 | 
| +          deactivate_snr_thresh(100000.f) {}
 | 
| +    int sample_rate_hz;
 | 
| +    int num_capture_channels;
 | 
| +    int num_render_channels;
 | 
| +    intelligibility::VarianceArray::StepType var_type;
 | 
| +    float var_decay_rate;
 | 
| +    int var_window_size;
 | 
| +    int analysis_rate;
 | 
| +    float gain_change_limit;
 | 
| +    float rho;
 | 
| +    float capture_vad_thresh;
 | 
| +    float render_vad_thresh;
 | 
| +    float activate_snr_thresh;
 | 
| +    float deactivate_snr_thresh;
 | 
| +  };
 | 
| +
 | 
| +  explicit IntelligibilityEnhancer(const Config& config);
 | 
| +  IntelligibilityEnhancer();  // Initialize with default config.
 | 
| +
 | 
|    ~IntelligibilityEnhancer();
 | 
|  
 | 
| -  // Reads and processes chunk of noise stream in time domain.
 | 
| -  void ProcessCaptureAudio(float* const* audio);
 | 
| +  // Reads and processes chunk of noise stream in time domain. Only updates
 | 
| +  // noise estimate when |voice_probability| below a threshold. Uses internal
 | 
| +  // VAD when |voice_probability| not provided.
 | 
| +  void AnalyzeCaptureAudio(float* const* audio,
 | 
| +                           int sample_rate_hz,
 | 
| +                           int num_channels,
 | 
| +                           float voice_probability);
 | 
| +  void AnalyzeCaptureAudio(float* const* audio,
 | 
| +                           int sample_rate_hz,
 | 
| +                           int num_channels);
 | 
|  
 | 
|    // Reads chunk of speech in time domain and updates with modified signal.
 | 
| -  void ProcessRenderAudio(float* const* audio);
 | 
| +  // Only updates speech estimate when |voice_probability| above a threshold.
 | 
| +  // Uses internal VAD when |voice_probability| not provided.
 | 
| +  void ProcessRenderAudio(float* const* audio,
 | 
| +                          int sample_rate_hz,
 | 
| +                          int num_channels,
 | 
| +                          float voice_probability);
 | 
| +  void ProcessRenderAudio(float* const* audio,
 | 
| +                          int sample_rate_hz,
 | 
| +                          int num_channels);
 | 
|  
 | 
|   private:
 | 
|    enum AudioSource {
 | 
| @@ -124,6 +156,12 @@ class IntelligibilityEnhancer {
 | 
|    // Stores in |result|.
 | 
|    void FilterVariance(const float* var, float* result);
 | 
|  
 | 
| +  // Returns ratio of total variance of clear to noise.
 | 
| +  float SNR();
 | 
| +
 | 
| +  // Updates |active_| based on SNR.
 | 
| +  void UpdateActivity();
 | 
| +
 | 
|    // Returns dot product of vectors specified by size |length| arrays |a|,|b|.
 | 
|    static float DotProduct(const float* a, const float* b, int length);
 | 
|  
 | 
| @@ -133,9 +171,16 @@ class IntelligibilityEnhancer {
 | 
|    const int bank_size_;     // Num ERB filters.
 | 
|    const int sample_rate_hz_;
 | 
|    const int erb_resolution_;
 | 
| -  const int channels_;       // Num channels.
 | 
| +  const int num_capture_channels_;
 | 
| +  const int num_render_channels_;
 | 
|    const int analysis_rate_;  // Num blocks before gains recalculated.
 | 
| -  const int variance_rate_;  // Num recalculations before history is cleared.
 | 
| +  const float capture_vad_thresh_;   // Threshold for updating noise estimate.
 | 
| +  const float render_vad_thresh_;    // Threshold for updating speech estimate.
 | 
| +  const float activate_snr_thresh_;  // Threshold for activating gain updates.
 | 
| +  const float deactivate_snr_thresh_;  // Threshold for deactivating.
 | 
| +
 | 
| +  bool active_;        // Whether render gains are being updated.
 | 
| +  bool deactivating_;  // True when we are smoothing enhancer off.
 | 
|  
 | 
|    intelligibility::VarianceArray clear_variance_;
 | 
|    intelligibility::VarianceArray noise_variance_;
 | 
| @@ -149,12 +194,12 @@ class IntelligibilityEnhancer {
 | 
|    rtc::scoped_ptr<float[]> gains_eq_;  // Pre-filter modified gains.
 | 
|    intelligibility::GainApplier gain_applier_;
 | 
|  
 | 
| -  // Destination buffer used to reassemble blocked chunks before overwriting
 | 
| +  // Destination buffers used to reassemble blocked chunks before overwriting
 | 
|    // the original input array with modifications.
 | 
|    // TODO(ekmeyerson): Switch to using ChannelBuffer.
 | 
| -  float** temp_out_buffer_;
 | 
| +  float** temp_render_out_buffer_;
 | 
| +  float** temp_capture_out_buffer_;
 | 
|  
 | 
| -  rtc::scoped_ptr<float* []> input_audio_;
 | 
|    rtc::scoped_ptr<float[]> kbd_window_;
 | 
|    TransformCallback render_callback_;
 | 
|    TransformCallback capture_callback_;
 | 
| @@ -163,13 +208,13 @@ class IntelligibilityEnhancer {
 | 
|    int block_count_;
 | 
|    int analysis_step_;
 | 
|  
 | 
| -  // TODO(bercic): Quick stopgap measure for voice detection in the clear
 | 
| -  // and noise streams.
 | 
| -  // Note: VAD currently does not affect anything in IntelligibilityEnhancer.
 | 
| -  VadInst* vad_high_;
 | 
| -  VadInst* vad_low_;
 | 
| +  VoiceActivityDetector capture_vad_;
 | 
| +  VoiceActivityDetector render_vad_;
 | 
| +  float capture_voice_probability_;
 | 
| +  float render_voice_probability_;
 | 
| +  bool using_capture_vad_;
 | 
| +  bool using_render_vad_;
 | 
|    rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;
 | 
| -  bool has_voice_low_;  // Whether voice detected in speech stream.
 | 
|  };
 | 
|  
 | 
|  }  // namespace webrtc
 | 
| 
 |