Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
index 3b46d16afeff4e6296ab61d8bc80f57435ab16ea..1f2b9843fec4894f2b7b8b963fe2357eae605f93 100644 |
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h |
@@ -17,7 +17,9 @@ |
#include "webrtc/common_audio/lapped_transform.h" |
#include "webrtc/common_audio/channel_buffer.h" |
+#include "webrtc/common_audio/swap_queue.h" |
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h" |
+#include "webrtc/modules/audio_processing/processing_component.h" |
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" |
namespace webrtc { |
@@ -29,7 +31,9 @@ namespace webrtc { |
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 |
class IntelligibilityEnhancer : public LappedTransform::Callback { |
public: |
- IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels); |
+ IntelligibilityEnhancer(int sample_rate_hz, |
+ size_t num_render_channels, |
+ size_t num_noise_bins); |
// Sets the capture noise magnitude spectrum estimate. |
void SetCaptureNoiseEstimate(std::vector<float> noise); |
@@ -72,15 +76,19 @@ class IntelligibilityEnhancer : public LappedTransform::Callback { |
// Returns true if the audio is speech. |
bool IsSpeech(const float* audio); |
+ // TODO(aluebs): Decrease this once we properly handle hugely unbalanced |
+ // reverse and forward call numbers. |
+ static const size_t kMaxNumFramesToBuffer = 100; |
+ |
peah-webrtc
2016/03/08 07:02:38
The size of this buffer seems a bit high. The reas
peah-webrtc
2016/03/08 07:02:38
Please rename as it is not frames that are buffere
aluebs-webrtc
2016/03/08 10:53:00
Done.
aluebs-webrtc
2016/03/08 10:53:00
That makes a lot of sense. I just copied and paste
|
const size_t freqs_; // Num frequencies in frequency domain. |
+ const size_t num_noise_bins_; |
const size_t chunk_length_; // Chunk size in samples. |
const size_t bank_size_; // Num ERB filters. |
const int sample_rate_hz_; |
const size_t num_render_channels_; |
intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_; |
- std::unique_ptr<intelligibility::PowerEstimator<float>> |
- noise_power_estimator_; |
+ intelligibility::PowerEstimator<float> noise_power_estimator_; |
std::vector<float> filtered_clear_pow_; |
std::vector<float> filtered_noise_pow_; |
std::vector<float> center_freqs_; |
@@ -97,6 +105,10 @@ class IntelligibilityEnhancer : public LappedTransform::Callback { |
std::vector<int16_t> audio_s16_; |
size_t chunks_since_voice_; |
bool is_speech_; |
+ |
+ std::vector<float> noise_estimation_buffer_; |
+ rtc::scoped_ptr<SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>> |
+ noise_estimation_queue_; |
}; |
} // namespace webrtc |