Chromium Code Reviews| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
| index fade1449ccdb3886af3dae5154fef3ee6f176bac..332fdb9360f0d3eb44859e13c0274a8d83b7e3c3 100644 |
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
| @@ -8,10 +8,6 @@ |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| -// |
| -// Specifies core class for intelligbility enhancement. |
| -// |
| - |
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ |
| #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ |
| @@ -28,30 +24,25 @@ namespace webrtc { |
| // Speech intelligibility enhancement module. Reads render and capture |
| // audio streams and modifies the render stream with a set of gains per |
| // frequency bin to enhance speech against the noise background. |
| -// Note: assumes speech and noise streams are already separated. |
| +// Details of the model and algorithm can be found in the original paper: |
| +// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 |
| class IntelligibilityEnhancer { |
| public: |
| struct Config { |
| - // |var_*| are parameters for the VarianceArray constructor for the |
| - // clear speech stream. |
| - // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should |
| - // probably go away once fine tuning is done. |
| + // TODO(bercic): the |decay_rate|, |analysis_rate| and |gain_limit| |
| + // parameters should probably go away once fine tuning is done. |
| Config() |
| : sample_rate_hz(16000), |
| num_capture_channels(1), |
| num_render_channels(1), |
| - var_type(intelligibility::VarianceArray::kStepDecaying), |
| - var_decay_rate(0.9f), |
| - var_window_size(10), |
| - analysis_rate(800), |
| + decay_rate(0.9f), |
|
turaj
2016/02/12 23:12:35
Maybe give names to 0.9f and 60.
aluebs-webrtc
2016/02/13 01:47:49
In the next CL I am deleting this Config and movin
|
| + analysis_rate(60), |
| gain_change_limit(0.1f), |
| rho(0.02f) {} |
| int sample_rate_hz; |
| size_t num_capture_channels; |
| size_t num_render_channels; |
| - intelligibility::VarianceArray::StepType var_type; |
| - float var_decay_rate; |
| - size_t var_window_size; |
| + float decay_rate; |
| int analysis_rate; |
| float gain_change_limit; |
| float rho; |
| @@ -90,13 +81,13 @@ class IntelligibilityEnhancer { |
| FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); |
| FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); |
| - // Updates variance computation and analysis with |in_block_|, |
| + // Updates power computation and analysis with |in_block_|, |
| // and writes modified speech to |out_block|. |
| void ProcessClearBlock(const std::complex<float>* in_block, |
| std::complex<float>* out_block); |
| // Computes and sets modified gains. |
| - void AnalyzeClearBlock(float power_target); |
| + void AnalyzeClearBlock(); |
| // Bisection search for optimal |lambda|. |
| void SolveForLambda(float power_target, float power_bot, float power_top); |
| @@ -127,10 +118,10 @@ class IntelligibilityEnhancer { |
| const bool active_; // Whether render gains are being updated. |
| // TODO(ekm): Add logic for updating |active_|. |
| - intelligibility::VarianceArray clear_variance_; |
| + PowerEstimator clear_power_; |
| std::vector<float> noise_power_; |
| - rtc::scoped_ptr<float[]> filtered_clear_var_; |
| - rtc::scoped_ptr<float[]> filtered_noise_var_; |
| + rtc::scoped_ptr<float[]> filtered_clear_pow_; |
| + rtc::scoped_ptr<float[]> filtered_noise_pow_; |
| rtc::scoped_ptr<float[]> center_freqs_; |
| std::vector<std::vector<float>> capture_filter_bank_; |
| std::vector<std::vector<float>> render_filter_bank_; |
| @@ -138,7 +129,7 @@ class IntelligibilityEnhancer { |
| rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR. |
| // for each ERB band. |
| rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains. |
| - intelligibility::GainApplier gain_applier_; |
| + GainApplier gain_applier_; |
| // Destination buffers used to reassemble blocked chunks before overwriting |
| // the original input array with modifications. |