OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 29 matching lines...) Expand all Loading... |
40 : sample_rate_hz(16000), | 40 : sample_rate_hz(16000), |
41 num_capture_channels(1), | 41 num_capture_channels(1), |
42 num_render_channels(1), | 42 num_render_channels(1), |
43 var_type(intelligibility::VarianceArray::kStepDecaying), | 43 var_type(intelligibility::VarianceArray::kStepDecaying), |
44 var_decay_rate(0.9f), | 44 var_decay_rate(0.9f), |
45 var_window_size(10), | 45 var_window_size(10), |
46 analysis_rate(800), | 46 analysis_rate(800), |
47 gain_change_limit(0.1f), | 47 gain_change_limit(0.1f), |
48 rho(0.02f) {} | 48 rho(0.02f) {} |
49 int sample_rate_hz; | 49 int sample_rate_hz; |
50 int num_capture_channels; | 50 size_t num_capture_channels; |
51 int num_render_channels; | 51 size_t num_render_channels; |
52 intelligibility::VarianceArray::StepType var_type; | 52 intelligibility::VarianceArray::StepType var_type; |
53 float var_decay_rate; | 53 float var_decay_rate; |
54 size_t var_window_size; | 54 size_t var_window_size; |
55 int analysis_rate; | 55 int analysis_rate; |
56 float gain_change_limit; | 56 float gain_change_limit; |
57 float rho; | 57 float rho; |
58 }; | 58 }; |
59 | 59 |
60 explicit IntelligibilityEnhancer(const Config& config); | 60 explicit IntelligibilityEnhancer(const Config& config); |
61 IntelligibilityEnhancer(); // Initialize with default config. | 61 IntelligibilityEnhancer(); // Initialize with default config. |
62 | 62 |
63 // Reads and processes chunk of noise stream in time domain. | 63 // Reads and processes chunk of noise stream in time domain. |
64 void AnalyzeCaptureAudio(float* const* audio, | 64 void AnalyzeCaptureAudio(float* const* audio, |
65 int sample_rate_hz, | 65 int sample_rate_hz, |
66 int num_channels); | 66 size_t num_channels); |
67 | 67 |
68 // Reads chunk of speech in time domain and updates with modified signal. | 68 // Reads chunk of speech in time domain and updates with modified signal. |
69 void ProcessRenderAudio(float* const* audio, | 69 void ProcessRenderAudio(float* const* audio, |
70 int sample_rate_hz, | 70 int sample_rate_hz, |
71 int num_channels); | 71 size_t num_channels); |
72 bool active() const; | 72 bool active() const; |
73 | 73 |
74 private: | 74 private: |
75 enum AudioSource { | 75 enum AudioSource { |
76 kRenderStream = 0, // Clear speech stream. | 76 kRenderStream = 0, // Clear speech stream. |
77 kCaptureStream, // Noise stream. | 77 kCaptureStream, // Noise stream. |
78 }; | 78 }; |
79 | 79 |
80 // Provides access point to the frequency domain. | 80 // Provides access point to the frequency domain. |
81 class TransformCallback : public LappedTransform::Callback { | 81 class TransformCallback : public LappedTransform::Callback { |
82 public: | 82 public: |
83 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source); | 83 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source); |
84 | 84 |
85 // All in frequency domain, receives input |in_block|, applies | 85 // All in frequency domain, receives input |in_block|, applies |
86 // intelligibility enhancement, and writes result to |out_block|. | 86 // intelligibility enhancement, and writes result to |out_block|. |
87 void ProcessAudioBlock(const std::complex<float>* const* in_block, | 87 void ProcessAudioBlock(const std::complex<float>* const* in_block, |
88 int in_channels, | 88 size_t in_channels, |
89 size_t frames, | 89 size_t frames, |
90 int out_channels, | 90 size_t out_channels, |
91 std::complex<float>* const* out_block) override; | 91 std::complex<float>* const* out_block) override; |
92 | 92 |
93 private: | 93 private: |
94 IntelligibilityEnhancer* parent_; | 94 IntelligibilityEnhancer* parent_; |
95 AudioSource source_; | 95 AudioSource source_; |
96 }; | 96 }; |
97 friend class TransformCallback; | 97 friend class TransformCallback; |
98 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); | 98 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); |
99 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); | 99 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); |
100 | 100 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
137 | 137 |
138 // Returns dot product of vectors specified by size |length| arrays |a|,|b|. | 138 // Returns dot product of vectors specified by size |length| arrays |a|,|b|. |
139 static float DotProduct(const float* a, const float* b, size_t length); | 139 static float DotProduct(const float* a, const float* b, size_t length); |
140 | 140 |
141 const size_t freqs_; // Num frequencies in frequency domain. | 141 const size_t freqs_; // Num frequencies in frequency domain. |
142 const size_t window_size_; // Window size in samples; also the block size. | 142 const size_t window_size_; // Window size in samples; also the block size. |
143 const size_t chunk_length_; // Chunk size in samples. | 143 const size_t chunk_length_; // Chunk size in samples. |
144 const size_t bank_size_; // Num ERB filters. | 144 const size_t bank_size_; // Num ERB filters. |
145 const int sample_rate_hz_; | 145 const int sample_rate_hz_; |
146 const int erb_resolution_; | 146 const int erb_resolution_; |
147 const int num_capture_channels_; | 147 const size_t num_capture_channels_; |
148 const int num_render_channels_; | 148 const size_t num_render_channels_; |
149 const int analysis_rate_; // Num blocks before gains recalculated. | 149 const int analysis_rate_; // Num blocks before gains recalculated. |
150 | 150 |
151 const bool active_; // Whether render gains are being updated. | 151 const bool active_; // Whether render gains are being updated. |
152 // TODO(ekm): Add logic for updating |active_|. | 152 // TODO(ekm): Add logic for updating |active_|. |
153 | 153 |
154 intelligibility::VarianceArray clear_variance_; | 154 intelligibility::VarianceArray clear_variance_; |
155 intelligibility::VarianceArray noise_variance_; | 155 intelligibility::VarianceArray noise_variance_; |
156 rtc::scoped_ptr<float[]> filtered_clear_var_; | 156 rtc::scoped_ptr<float[]> filtered_clear_var_; |
157 rtc::scoped_ptr<float[]> filtered_noise_var_; | 157 rtc::scoped_ptr<float[]> filtered_noise_var_; |
158 std::vector<std::vector<float>> filter_bank_; | 158 std::vector<std::vector<float>> filter_bank_; |
(...skipping 14 matching lines...) Expand all Loading... |
173 TransformCallback capture_callback_; | 173 TransformCallback capture_callback_; |
174 rtc::scoped_ptr<LappedTransform> render_mangler_; | 174 rtc::scoped_ptr<LappedTransform> render_mangler_; |
175 rtc::scoped_ptr<LappedTransform> capture_mangler_; | 175 rtc::scoped_ptr<LappedTransform> capture_mangler_; |
176 int block_count_; | 176 int block_count_; |
177 int analysis_step_; | 177 int analysis_step_; |
178 }; | 178 }; |
179 | 179 |
180 } // namespace webrtc | 180 } // namespace webrtc |
181 | 181 |
182 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
CER_H_ | 182 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN
CER_H_ |
OLD | NEW |