 Chromium Code Reviews
 Chromium Code Reviews Issue 1234463003:
  Integrate Intelligibility with APM  (Closed) 
  Base URL: https://chromium.googlesource.com/external/webrtc.git@master
    
  
    Issue 1234463003:
  Integrate Intelligibility with APM  (Closed) 
  Base URL: https://chromium.googlesource.com/external/webrtc.git@master| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h | 
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h | 
| index df47de597885ed61d9dd9a824d2c6505c1be99a4..674a2dd5b23dcc20d8ea471fdb59970601396a54 100644 | 
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h | 
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h | 
| @@ -33,32 +33,49 @@ namespace webrtc { | 
| // Note: assumes speech and noise streams are already separated. | 
| class IntelligibilityEnhancer { | 
| public: | 
| - // Construct a new instance with the given filter bank resolution, | 
| - // sampling rate, number of channels and analysis rates. | 
| - // |analysis_rate| sets the number of input blocks (containing speech!) | 
| - // to elapse before a new gain computation is made. |variance_rate| specifies | 
| - // the number of gain recomputations after which the variances are reset. | 
| - // |cv_*| are parameters for the VarianceArray constructor for the | 
| - // clear speech stream. | 
| - // TODO(bercic): the |cv_*|, |*_rate| and |gain_limit| parameters should | 
| - // probably go away once fine tuning is done. They override the internal | 
| - // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate). | 
| - IntelligibilityEnhancer(int erb_resolution, | 
| - int sample_rate_hz, | 
| - int channels, | 
| - int cv_type, | 
| - float cv_alpha, | 
| - int cv_win, | 
| - int analysis_rate, | 
| - int variance_rate, | 
| - float gain_limit); | 
| + struct Config { | 
| + // |var_*| are parameters for the VarianceArray constructor for the | 
| + // clear speech stream. | 
| + // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should | 
| + // probably go away once fine tuning is done. They override the internal | 
| + // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate). | 
| + Config() | 
| + : sample_rate_hz(16000), | 
| + channels(1), | 
| + var_type(intelligibility::VarianceArray::kStepDecaying), | 
| + var_decay_rate(0.9f), | 
| + var_window_size(10), | 
| + analysis_rate(800), | 
| + gain_change_limit(0.1f), | 
| + rho(0.02f), | 
| + capture_vad_thresh(1.f), | 
| + render_vad_thresh(0.f) {} | 
| + int sample_rate_hz; | 
| + int channels; | 
| + intelligibility::VarianceArray::StepType var_type; | 
| + float var_decay_rate; | 
| + int var_window_size; | 
| + int analysis_rate; | 
| + float gain_change_limit; | 
| + float rho; | 
| + float capture_vad_thresh; | 
| + float render_vad_thresh; | 
| + }; | 
| + | 
| + explicit IntelligibilityEnhancer(const Config& config); | 
| + IntelligibilityEnhancer(); // Initialize with default config. | 
| + | 
| ~IntelligibilityEnhancer(); | 
| - // Reads and processes chunk of noise stream in time domain. | 
| - void ProcessCaptureAudio(float* const* audio); | 
| + // Reads and processes chunk of noise stream in time domain. Only updates | 
| + // noise estimate when |voice_probability| below a threshold. | 
| + void ProcessCaptureAudio(float* const* audio, const float voice_probability); | 
| 
aluebs-webrtc
2015/07/15 01:02:04
Does it actually processes the capture audio or do
 
aluebs-webrtc
2015/07/15 01:02:05
In all of these methods you assume the sample rate
 
ekm
2015/07/17 19:59:38
Agreed. Similarly, in APM reverted AnalyzeReverseS
 
ekm
2015/07/17 19:59:38
Done.
 | 
| + void ProcessCaptureAudio(float* const* audio); // Assumes noise. | 
| 
aluebs-webrtc
2015/07/15 01:02:04
Do we want to surface both interfaces to the user?
 
ekm
2015/07/17 19:59:38
I think it's nice to give the user the option of u
 
aluebs-webrtc
2015/07/20 19:33:42
Agreed.
 | 
| // Reads chunk of speech in time domain and updates with modified signal. | 
| - void ProcessRenderAudio(float* const* audio); | 
| + // Only updates speech estimate when |voice_probability| above a threshold. | 
| + void ProcessRenderAudio(float* const* audio, const float voice_probability); | 
| + void ProcessRenderAudio(float* const* audio); // Assumes speech. | 
| 
aluebs-webrtc
2015/07/15 01:02:04
Do we want to surface both interfaces to the user?
 
ekm
2015/07/17 19:59:38
See above.
 | 
| private: | 
| enum AudioSource { | 
| @@ -135,7 +152,8 @@ class IntelligibilityEnhancer { | 
| const int erb_resolution_; | 
| const int channels_; // Num channels. | 
| const int analysis_rate_; // Num blocks before gains recalculated. | 
| - const int variance_rate_; // Num recalculations before history is cleared. | 
| + const float capture_vad_thresh_; // Threshold for updating noise estimate. | 
| + const float render_vad_thresh_; // Threshold for updating speech estimate. | 
| intelligibility::VarianceArray clear_variance_; | 
| intelligibility::VarianceArray noise_variance_; | 
| @@ -154,7 +172,6 @@ class IntelligibilityEnhancer { | 
| // TODO(ekmeyerson): Switch to using ChannelBuffer. | 
| float** temp_out_buffer_; | 
| - rtc::scoped_ptr<float* []> input_audio_; | 
| rtc::scoped_ptr<float[]> kbd_window_; | 
| TransformCallback render_callback_; | 
| TransformCallback capture_callback_; |