| OLD | NEW | 
|    1 /* |    1 /* | 
|    2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |    2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 
|    3  * |    3  * | 
|    4  *  Use of this source code is governed by a BSD-style license |    4  *  Use of this source code is governed by a BSD-style license | 
|    5  *  that can be found in the LICENSE file in the root of the source |    5  *  that can be found in the LICENSE file in the root of the source | 
|    6  *  tree. An additional intellectual property rights grant can be found |    6  *  tree. An additional intellectual property rights grant can be found | 
|    7  *  in the file PATENTS.  All contributing project authors may |    7  *  in the file PATENTS.  All contributing project authors may | 
|    8  *  be found in the AUTHORS file in the root of the source tree. |    8  *  be found in the AUTHORS file in the root of the source tree. | 
|    9  */ |    9  */ | 
|   10  |   10  | 
|   11 // |  | 
|   12 //  Specifies core class for intelligbility enhancement. |  | 
|   13 // |  | 
|   14  |  | 
|   15 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER
     _H_ |   11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER
     _H_ | 
|   16 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER
     _H_ |   12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER
     _H_ | 
|   17  |   13  | 
|   18 #include <complex> |   14 #include <complex> | 
|   19 #include <vector> |   15 #include <vector> | 
|   20  |   16  | 
|   21 #include "webrtc/base/scoped_ptr.h" |   17 #include "webrtc/base/scoped_ptr.h" | 
|   22 #include "webrtc/common_audio/lapped_transform.h" |   18 #include "webrtc/common_audio/lapped_transform.h" | 
|   23 #include "webrtc/common_audio/channel_buffer.h" |   19 #include "webrtc/common_audio/channel_buffer.h" | 
|   24 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.
     h" |   20 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.
     h" | 
|   25  |   21  | 
|   26 namespace webrtc { |   22 namespace webrtc { | 
|   27  |   23  | 
|   28 // Speech intelligibility enhancement module. Reads render and capture |   24 // Speech intelligibility enhancement module. Reads render and capture | 
|   29 // audio streams and modifies the render stream with a set of gains per |   25 // audio streams and modifies the render stream with a set of gains per | 
|   30 // frequency bin to enhance speech against the noise background. |   26 // frequency bin to enhance speech against the noise background. | 
|   31 // Note: assumes speech and noise streams are already separated. |   27 // Details of the model and algorithm can be found in the original paper: | 
 |   28 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 | 
|   32 class IntelligibilityEnhancer { |   29 class IntelligibilityEnhancer { | 
|   33  public: |   30  public: | 
|   34   struct Config { |   31   struct Config { | 
|   35     // |var_*| are parameters for the VarianceArray constructor for the |   32     // TODO(bercic): the |decay_rate|, |analysis_rate| and |gain_limit| | 
|   36     // clear speech stream. |   33     // parameters should probably go away once fine tuning is done. | 
|   37     // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should |  | 
|   38     // probably go away once fine tuning is done. |  | 
|   39     Config() |   34     Config() | 
|   40         : sample_rate_hz(16000), |   35         : sample_rate_hz(16000), | 
|   41           num_capture_channels(1), |   36           num_capture_channels(1), | 
|   42           num_render_channels(1), |   37           num_render_channels(1), | 
|   43           var_type(intelligibility::VarianceArray::kStepDecaying), |   38           decay_rate(0.9f), | 
|   44           var_decay_rate(0.9f), |   39           analysis_rate(60), | 
|   45           var_window_size(10), |  | 
|   46           analysis_rate(800), |  | 
|   47           gain_change_limit(0.1f), |   40           gain_change_limit(0.1f), | 
|   48           rho(0.02f) {} |   41           rho(0.02f) {} | 
|   49     int sample_rate_hz; |   42     int sample_rate_hz; | 
|   50     size_t num_capture_channels; |   43     size_t num_capture_channels; | 
|   51     size_t num_render_channels; |   44     size_t num_render_channels; | 
|   52     intelligibility::VarianceArray::StepType var_type; |   45     float decay_rate; | 
|   53     float var_decay_rate; |  | 
|   54     size_t var_window_size; |  | 
|   55     int analysis_rate; |   46     int analysis_rate; | 
|   56     float gain_change_limit; |   47     float gain_change_limit; | 
|   57     float rho; |   48     float rho; | 
|   58   }; |   49   }; | 
|   59  |   50  | 
|   60   explicit IntelligibilityEnhancer(const Config& config); |   51   explicit IntelligibilityEnhancer(const Config& config); | 
|   61   IntelligibilityEnhancer();  // Initialize with default config. |   52   IntelligibilityEnhancer();  // Initialize with default config. | 
|   62  |   53  | 
|   63   // Sets the capture noise magnitude spectrum estimate. |   54   // Sets the capture noise magnitude spectrum estimate. | 
|   64   void SetCaptureNoiseEstimate(std::vector<float> noise); |   55   void SetCaptureNoiseEstimate(std::vector<float> noise); | 
| (...skipping 18 matching lines...) Expand all  Loading... | 
|   83                            size_t out_channels, |   74                            size_t out_channels, | 
|   84                            std::complex<float>* const* out_block) override; |   75                            std::complex<float>* const* out_block) override; | 
|   85  |   76  | 
|   86    private: |   77    private: | 
|   87     IntelligibilityEnhancer* parent_; |   78     IntelligibilityEnhancer* parent_; | 
|   88   }; |   79   }; | 
|   89   friend class TransformCallback; |   80   friend class TransformCallback; | 
|   90   FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); |   81   FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); | 
|   91   FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); |   82   FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); | 
|   92  |   83  | 
|   93   // Updates variance computation and analysis with |in_block_|, |   84   // Updates power computation and analysis with |in_block_|, | 
|   94   // and writes modified speech to |out_block|. |   85   // and writes modified speech to |out_block|. | 
|   95   void ProcessClearBlock(const std::complex<float>* in_block, |   86   void ProcessClearBlock(const std::complex<float>* in_block, | 
|   96                          std::complex<float>* out_block); |   87                          std::complex<float>* out_block); | 
|   97  |   88  | 
|   98   // Computes and sets modified gains. |   89   // Computes and sets modified gains. | 
|   99   void AnalyzeClearBlock(float power_target); |   90   void AnalyzeClearBlock(); | 
|  100  |   91  | 
|  101   // Bisection search for optimal |lambda|. |   92   // Bisection search for optimal |lambda|. | 
|  102   void SolveForLambda(float power_target, float power_bot, float power_top); |   93   void SolveForLambda(float power_target, float power_bot, float power_top); | 
|  103  |   94  | 
|  104   // Transforms freq gains to ERB gains. |   95   // Transforms freq gains to ERB gains. | 
|  105   void UpdateErbGains(); |   96   void UpdateErbGains(); | 
|  106  |   97  | 
|  107   // Returns number of ERB filters. |   98   // Returns number of ERB filters. | 
|  108   static size_t GetBankSize(int sample_rate, size_t erb_resolution); |   99   static size_t GetBankSize(int sample_rate, size_t erb_resolution); | 
|  109  |  100  | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
|  120   const size_t bank_size_;     // Num ERB filters. |  111   const size_t bank_size_;     // Num ERB filters. | 
|  121   const int sample_rate_hz_; |  112   const int sample_rate_hz_; | 
|  122   const int erb_resolution_; |  113   const int erb_resolution_; | 
|  123   const size_t num_capture_channels_; |  114   const size_t num_capture_channels_; | 
|  124   const size_t num_render_channels_; |  115   const size_t num_render_channels_; | 
|  125   const int analysis_rate_;    // Num blocks before gains recalculated. |  116   const int analysis_rate_;    // Num blocks before gains recalculated. | 
|  126  |  117  | 
|  127   const bool active_;          // Whether render gains are being updated. |  118   const bool active_;          // Whether render gains are being updated. | 
|  128                                // TODO(ekm): Add logic for updating |active_|. |  119                                // TODO(ekm): Add logic for updating |active_|. | 
|  129  |  120  | 
|  130   intelligibility::VarianceArray clear_variance_; |  121   PowerEstimator clear_power_; | 
|  131   std::vector<float> noise_power_; |  122   std::vector<float> noise_power_; | 
|  132   rtc::scoped_ptr<float[]> filtered_clear_var_; |  123   rtc::scoped_ptr<float[]> filtered_clear_pow_; | 
|  133   rtc::scoped_ptr<float[]> filtered_noise_var_; |  124   rtc::scoped_ptr<float[]> filtered_noise_pow_; | 
|  134   rtc::scoped_ptr<float[]> center_freqs_; |  125   rtc::scoped_ptr<float[]> center_freqs_; | 
|  135   std::vector<std::vector<float>> capture_filter_bank_; |  126   std::vector<std::vector<float>> capture_filter_bank_; | 
|  136   std::vector<std::vector<float>> render_filter_bank_; |  127   std::vector<std::vector<float>> render_filter_bank_; | 
|  137   size_t start_freq_; |  128   size_t start_freq_; | 
|  138   rtc::scoped_ptr<float[]> rho_;  // Production and interpretation SNR. |  129   rtc::scoped_ptr<float[]> rho_;  // Production and interpretation SNR. | 
|  139                                   // for each ERB band. |  130                                   // for each ERB band. | 
|  140   rtc::scoped_ptr<float[]> gains_eq_;  // Pre-filter modified gains. |  131   rtc::scoped_ptr<float[]> gains_eq_;  // Pre-filter modified gains. | 
|  141   intelligibility::GainApplier gain_applier_; |  132   GainApplier gain_applier_; | 
|  142  |  133  | 
|  143   // Destination buffers used to reassemble blocked chunks before overwriting |  134   // Destination buffers used to reassemble blocked chunks before overwriting | 
|  144   // the original input array with modifications. |  135   // the original input array with modifications. | 
|  145   ChannelBuffer<float> temp_render_out_buffer_; |  136   ChannelBuffer<float> temp_render_out_buffer_; | 
|  146  |  137  | 
|  147   rtc::scoped_ptr<float[]> kbd_window_; |  138   rtc::scoped_ptr<float[]> kbd_window_; | 
|  148   TransformCallback render_callback_; |  139   TransformCallback render_callback_; | 
|  149   rtc::scoped_ptr<LappedTransform> render_mangler_; |  140   rtc::scoped_ptr<LappedTransform> render_mangler_; | 
|  150   int block_count_; |  141   int block_count_; | 
|  151   int analysis_step_; |  142   int analysis_step_; | 
|  152 }; |  143 }; | 
|  153  |  144  | 
|  154 }  // namespace webrtc |  145 }  // namespace webrtc | 
|  155  |  146  | 
|  156 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
     CER_H_ |  147 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
     CER_H_ | 
| OLD | NEW |