| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 /* | 
|  | 2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 
|  | 3  * | 
|  | 4  *  Use of this source code is governed by a BSD-style license | 
|  | 5  *  that can be found in the LICENSE file in the root of the source | 
|  | 6  *  tree. An additional intellectual property rights grant can be found | 
|  | 7  *  in the file PATENTS.  All contributing project authors may | 
|  | 8  *  be found in the AUTHORS file in the root of the source tree. | 
|  | 9  */ | 
|  | 10 | 
|  | 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER
     _H_ | 
|  | 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER
     _H_ | 
|  | 13 | 
|  | 14 #include <complex> | 
|  | 15 | 
|  | 16 #include "webrtc/common_audio/lapped_transform.h" | 
|  | 17 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.
     h" | 
|  | 18 #include "webrtc/system_wrappers/interface/scoped_ptr.h" | 
|  | 19 | 
|  | 20 struct WebRtcVadInst; | 
|  | 21 typedef struct WebRtcVadInst VadInst; | 
|  | 22 | 
|  | 23 namespace webrtc { | 
|  | 24 | 
|  | 25 // Speech intelligibility enhancement module. Reads render and capture | 
|  | 26 // audio streams and modifies the render stream with a set of gains per | 
|  | 27 // frequency bin to enhance speech against the noise background. | 
|  | 28 class IntelligibilityEnhancer { | 
|  | 29  public: | 
|  | 30   // Construct a new instance with the given filter bank resolution, | 
|  | 31   // sampling rate, number of channels and analysis rates. | 
|  | 32   // |analysis_rate| sets the number of input blocks (containing speech!) | 
|  | 33   // to elapse before a new gain computation is made. |variance_rate| specifies | 
|  | 34   // the number of gain recomputations after which the variances are reset. | 
|  | 35   // |cv_*| are parameters for the VarianceArray constructor for the | 
|  | 36   // lear speech stream. | 
|  | 37   // TODO(bercic): the |cv_*|, |*_rate| and |gain_limit| parameters should | 
|  | 38   // probably go away once fine tuning is done. They override the internal | 
|  | 39   // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate). | 
|  | 40   IntelligibilityEnhancer(int erb_resolution, int sample_rate_hz, int channels, | 
|  | 41                           int cv_type, float cv_alpha, int cv_win, | 
|  | 42                           int analysis_rate, int variance_rate, | 
|  | 43                           float gain_limit); | 
|  | 44   ~IntelligibilityEnhancer(); | 
|  | 45 | 
|  | 46   void ProcessRenderAudio(float* const* audio); | 
|  | 47   void ProcessCaptureAudio(float* const* audio); | 
|  | 48 | 
|  | 49  private: | 
|  | 50   enum AudioSource { | 
|  | 51     kRenderStream = 0, | 
|  | 52     kCaptureStream, | 
|  | 53   }; | 
|  | 54 | 
|  | 55   class TransformCallback : public LappedTransform::Callback { | 
|  | 56    public: | 
|  | 57     TransformCallback(IntelligibilityEnhancer* parent, AudioSource source); | 
|  | 58     virtual void ProcessAudioBlock(const std::complex<float>* const* in_block, | 
|  | 59                                    int in_channels, int frames, | 
|  | 60                                    int out_channels, | 
|  | 61                                    std::complex<float>* const* out_block); | 
|  | 62 | 
|  | 63    private: | 
|  | 64     IntelligibilityEnhancer* parent_; | 
|  | 65     AudioSource source_; | 
|  | 66   }; | 
|  | 67   friend class TransformCallback; | 
|  | 68 | 
|  | 69   void DispatchAudio(AudioSource source, const std::complex<float>* in_block, | 
|  | 70                      std::complex<float>* out_block); | 
|  | 71   void ProcessClearBlock(const std::complex<float>* in_block, | 
|  | 72                          std::complex<float>* out_block); | 
|  | 73   void AnalyzeClearBlock(float power_target); | 
|  | 74   void ProcessNoiseBlock(const std::complex<float>* in_block, | 
|  | 75                          std::complex<float>* out_block); | 
|  | 76 | 
|  | 77   static int GetBankSize(int sample_rate, int erb_resolution); | 
|  | 78   void CreateErbBank(); | 
|  | 79   void SolveEquation14(float lambda, int start_freq, float* sols); | 
|  | 80   void FilterVariance(const float* var, float* result); | 
|  | 81   static float DotProduct(const float* a, const float* b, int length); | 
|  | 82 | 
|  | 83   static const int kErbResolution; | 
|  | 84   static const int kWindowSizeMs; | 
|  | 85   static const int kChunkSizeMs; | 
|  | 86   static const int kAnalyzeRate; | 
|  | 87   static const int kVarianceRate; | 
|  | 88   static const float kClipFreq; | 
|  | 89   static const float kConfigRho; | 
|  | 90   static const float kKbdAlpha; | 
|  | 91   static const float kGainChangeLimit; | 
|  | 92 | 
|  | 93   const int freqs_; | 
|  | 94   const int window_size_;  // window size in samples; also the block size | 
|  | 95   const int chunk_length_;  // chunk size in samples | 
|  | 96   const int bank_size_; | 
|  | 97   const int sample_rate_hz_; | 
|  | 98   const int erb_resolution_; | 
|  | 99   const int channels_; | 
|  | 100   const int analysis_rate_; | 
|  | 101   const int variance_rate_; | 
|  | 102 | 
|  | 103   intelligibility::VarianceArray clear_variance_; | 
|  | 104   intelligibility::VarianceArray noise_variance_; | 
|  | 105   scoped_ptr<float[]> filtered_clear_var_; | 
|  | 106   scoped_ptr<float[]> filtered_noise_var_; | 
|  | 107   float** filter_bank_; | 
|  | 108   scoped_ptr<float[]> center_freqs_; | 
|  | 109   int start_freq_; | 
|  | 110   scoped_ptr<float[]> rho_; | 
|  | 111   scoped_ptr<float[]> gains_eq_; | 
|  | 112   intelligibility::GainApplier gain_applier_; | 
|  | 113 | 
|  | 114   // Destination buffer used to reassemble blocked chunks before overwriting | 
|  | 115   // the original input array with modifications. | 
|  | 116   float** temp_out_buffer_; | 
|  | 117   scoped_ptr<float*[]> input_audio_; | 
|  | 118   scoped_ptr<float[]> kbd_window_; | 
|  | 119   TransformCallback render_callback_; | 
|  | 120   TransformCallback capture_callback_; | 
|  | 121   scoped_ptr<LappedTransform> render_mangler_; | 
|  | 122   scoped_ptr<LappedTransform> capture_mangler_; | 
|  | 123   int block_count_; | 
|  | 124   int analysis_step_; | 
|  | 125 | 
|  | 126   // TODO(bercic): Quick stopgap measure for voice detection in the clear | 
|  | 127   // and noise streams. | 
|  | 128   VadInst* vad_high_; | 
|  | 129   VadInst* vad_low_; | 
|  | 130   scoped_ptr<int16_t[]> vad_tmp_buffer_; | 
|  | 131   bool has_voice_low_; | 
|  | 132 }; | 
|  | 133 | 
|  | 134 }  // namespace webrtc | 
|  | 135 | 
|  | 136 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
     CER_H_ | 
|  | 137 | 
| OLD | NEW | 
|---|