| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 float var_decay_rate; | 53 float var_decay_rate; |
| 54 size_t var_window_size; | 54 size_t var_window_size; |
| 55 int analysis_rate; | 55 int analysis_rate; |
| 56 float gain_change_limit; | 56 float gain_change_limit; |
| 57 float rho; | 57 float rho; |
| 58 }; | 58 }; |
| 59 | 59 |
| 60 explicit IntelligibilityEnhancer(const Config& config); | 60 explicit IntelligibilityEnhancer(const Config& config); |
| 61 IntelligibilityEnhancer(); // Initialize with default config. | 61 IntelligibilityEnhancer(); // Initialize with default config. |
| 62 | 62 |
| 63 // Reads and processes chunk of noise stream in time domain. | 63 // Sets the capture noise magnitude spectrum estimate. |
| 64 void AnalyzeCaptureAudio(float* const* audio, | 64 void SetCaptureNoiseEstimate(std::vector<float> noise); |
| 65 int sample_rate_hz, | |
| 66 size_t num_channels); | |
| 67 | 65 |
| 68 // Reads chunk of speech in time domain and updates with modified signal. | 66 // Reads chunk of speech in time domain and updates with modified signal. |
| 69 void ProcessRenderAudio(float* const* audio, | 67 void ProcessRenderAudio(float* const* audio, |
| 70 int sample_rate_hz, | 68 int sample_rate_hz, |
| 71 size_t num_channels); | 69 size_t num_channels); |
| 72 bool active() const; | 70 bool active() const; |
| 73 | 71 |
| 74 private: | 72 private: |
| 75 enum AudioSource { | |
| 76 kRenderStream = 0, // Clear speech stream. | |
| 77 kCaptureStream, // Noise stream. | |
| 78 }; | |
| 79 | |
| 80 // Provides access point to the frequency domain. | 73 // Provides access point to the frequency domain. |
| 81 class TransformCallback : public LappedTransform::Callback { | 74 class TransformCallback : public LappedTransform::Callback { |
| 82 public: | 75 public: |
| 83 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source); | 76 TransformCallback(IntelligibilityEnhancer* parent); |
| 84 | 77 |
| 85 // All in frequency domain, receives input |in_block|, applies | 78 // All in frequency domain, receives input |in_block|, applies |
| 86 // intelligibility enhancement, and writes result to |out_block|. | 79 // intelligibility enhancement, and writes result to |out_block|. |
| 87 void ProcessAudioBlock(const std::complex<float>* const* in_block, | 80 void ProcessAudioBlock(const std::complex<float>* const* in_block, |
| 88 size_t in_channels, | 81 size_t in_channels, |
| 89 size_t frames, | 82 size_t frames, |
| 90 size_t out_channels, | 83 size_t out_channels, |
| 91 std::complex<float>* const* out_block) override; | 84 std::complex<float>* const* out_block) override; |
| 92 | 85 |
| 93 private: | 86 private: |
| 94 IntelligibilityEnhancer* parent_; | 87 IntelligibilityEnhancer* parent_; |
| 95 AudioSource source_; | |
| 96 }; | 88 }; |
| 97 friend class TransformCallback; | 89 friend class TransformCallback; |
| 98 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); | 90 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); |
| 99 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); | 91 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); |
| 100 | 92 |
| 101 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source. | |
| 102 void DispatchAudio(AudioSource source, | |
| 103 const std::complex<float>* in_block, | |
| 104 std::complex<float>* out_block); | |
| 105 | |
| 106 // Updates variance computation and analysis with |in_block_|, | 93 // Updates variance computation and analysis with |in_block_|, |
| 107 // and writes modified speech to |out_block|. | 94 // and writes modified speech to |out_block|. |
| 108 void ProcessClearBlock(const std::complex<float>* in_block, | 95 void ProcessClearBlock(const std::complex<float>* in_block, |
| 109 std::complex<float>* out_block); | 96 std::complex<float>* out_block); |
| 110 | 97 |
| 111 // Computes and sets modified gains. | 98 // Computes and sets modified gains. |
| 112 void AnalyzeClearBlock(float power_target); | 99 void AnalyzeClearBlock(float power_target); |
| 113 | 100 |
| 114 // Bisection search for optimal |lambda|. | 101 // Bisection search for optimal |lambda|. |
| 115 void SolveForLambda(float power_target, float power_bot, float power_top); | 102 void SolveForLambda(float power_target, float power_bot, float power_top); |
| 116 | 103 |
| 117 // Transforms freq gains to ERB gains. | 104 // Transforms freq gains to ERB gains. |
| 118 void UpdateErbGains(); | 105 void UpdateErbGains(); |
| 119 | 106 |
| 120 // Updates variance calculation for noise input with |in_block|. | |
| 121 void ProcessNoiseBlock(const std::complex<float>* in_block, | |
| 122 std::complex<float>* out_block); | |
| 123 | |
| 124 // Returns number of ERB filters. | 107 // Returns number of ERB filters. |
| 125 static size_t GetBankSize(int sample_rate, size_t erb_resolution); | 108 static size_t GetBankSize(int sample_rate, size_t erb_resolution); |
| 126 | 109 |
| 127 // Initializes ERB filterbank. | 110 // Initializes ERB filterbank. |
| 128 void CreateErbBank(); | 111 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs); |
| 129 | 112 |
| 130 // Analytically solves quadratic for optimal gains given |lambda|. | 113 // Analytically solves quadratic for optimal gains given |lambda|. |
| 131 // Negative gains are set to 0. Stores the results in |sols|. | 114 // Negative gains are set to 0. Stores the results in |sols|. |
| 132 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); | 115 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); |
| 133 | 116 |
| 134 // Computes variance across ERB filters from freq variance |var|. | |
| 135 // Stores in |result|. | |
| 136 void FilterVariance(const float* var, float* result); | |
| 137 | |
| 138 // Returns dot product of vectors specified by size |length| arrays |a|,|b|. | |
| 139 static float DotProduct(const float* a, const float* b, size_t length); | |
| 140 | |
| 141 const size_t freqs_; // Num frequencies in frequency domain. | 117 const size_t freqs_; // Num frequencies in frequency domain. |
| 142 const size_t window_size_; // Window size in samples; also the block size. | 118 const size_t window_size_; // Window size in samples; also the block size. |
| 143 const size_t chunk_length_; // Chunk size in samples. | 119 const size_t chunk_length_; // Chunk size in samples. |
| 144 const size_t bank_size_; // Num ERB filters. | 120 const size_t bank_size_; // Num ERB filters. |
| 145 const int sample_rate_hz_; | 121 const int sample_rate_hz_; |
| 146 const int erb_resolution_; | 122 const int erb_resolution_; |
| 147 const size_t num_capture_channels_; | 123 const size_t num_capture_channels_; |
| 148 const size_t num_render_channels_; | 124 const size_t num_render_channels_; |
| 149 const int analysis_rate_; // Num blocks before gains recalculated. | 125 const int analysis_rate_; // Num blocks before gains recalculated. |
| 150 | 126 |
| 151 const bool active_; // Whether render gains are being updated. | 127 const bool active_; // Whether render gains are being updated. |
| 152 // TODO(ekm): Add logic for updating |active_|. | 128 // TODO(ekm): Add logic for updating |active_|. |
| 153 | 129 |
| 154 intelligibility::VarianceArray clear_variance_; | 130 intelligibility::VarianceArray clear_variance_; |
| 155 intelligibility::VarianceArray noise_variance_; | |
| 156 rtc::scoped_ptr<float[]> filtered_clear_var_; | 131 rtc::scoped_ptr<float[]> filtered_clear_var_; |
| 157 rtc::scoped_ptr<float[]> filtered_noise_var_; | 132 rtc::scoped_ptr<float[]> filtered_noise_var_; |
| 158 std::vector<std::vector<float>> filter_bank_; | |
| 159 rtc::scoped_ptr<float[]> center_freqs_; | 133 rtc::scoped_ptr<float[]> center_freqs_; |
| 134 std::vector<std::vector<float>> capture_filter_bank_; |
| 135 std::vector<std::vector<float>> render_filter_bank_; |
| 160 size_t start_freq_; | 136 size_t start_freq_; |
| 161 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR. | 137 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR. |
| 162 // for each ERB band. | 138 // for each ERB band. |
| 163 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains. | 139 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains. |
| 164 intelligibility::GainApplier gain_applier_; | 140 intelligibility::GainApplier gain_applier_; |
| 165 | 141 |
| 166 // Destination buffers used to reassemble blocked chunks before overwriting | 142 // Destination buffers used to reassemble blocked chunks before overwriting |
| 167 // the original input array with modifications. | 143 // the original input array with modifications. |
| 168 ChannelBuffer<float> temp_render_out_buffer_; | 144 ChannelBuffer<float> temp_render_out_buffer_; |
| 169 ChannelBuffer<float> temp_capture_out_buffer_; | |
| 170 | 145 |
| 171 rtc::scoped_ptr<float[]> kbd_window_; | 146 rtc::scoped_ptr<float[]> kbd_window_; |
| 172 TransformCallback render_callback_; | 147 TransformCallback render_callback_; |
| 173 TransformCallback capture_callback_; | |
| 174 rtc::scoped_ptr<LappedTransform> render_mangler_; | 148 rtc::scoped_ptr<LappedTransform> render_mangler_; |
| 175 rtc::scoped_ptr<LappedTransform> capture_mangler_; | |
| 176 int block_count_; | 149 int block_count_; |
| 177 int analysis_step_; | 150 int analysis_step_; |
| 178 }; | 151 }; |
| 179 | 152 |
| 180 } // namespace webrtc | 153 } // namespace webrtc |
| 181 | 154 |
| 182 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
CER_H_ | 155 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
CER_H_ |
| OLD | NEW |