webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1672343002: Using the NS noise estimate for the IE

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1672343002: Using the NS noise estimate for the IE (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ns

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

index 1eb22342ad07233f8b28c9abc0d55880b9ee8076..bf22e9c2ac1f91d0d7c04de05d02a580567f9dae 100644

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

@@ -60,10 +60,8 @@ class IntelligibilityEnhancer {

explicit IntelligibilityEnhancer(const Config& config);

IntelligibilityEnhancer(); // Initialize with default config.

- // Reads and processes chunk of noise stream in time domain.

- void AnalyzeCaptureAudio(float* const* audio,

- int sample_rate_hz,

- size_t num_channels);

+ // Sets the capture noise estimate.

hlundin-webrtc 2016/02/08 10:29:28 "Noise estimate" is a bit generic. It is the noise

aluebs-webrtc 2016/02/09 00:19:15 Right. Improved the comment.

+ void SetCaptureNoiseEstimate(const std::vector<float>& noise);

// Reads chunk of speech in time domain and updates with modified signal.

void ProcessRenderAudio(float* const* audio,

@@ -72,15 +70,10 @@ class IntelligibilityEnhancer {

bool active() const;

private:

- enum AudioSource {

- kRenderStream = 0, // Clear speech stream.

- kCaptureStream, // Noise stream.

- };

// Provides access point to the frequency domain.

class TransformCallback : public LappedTransform::Callback {

public:

- TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);

+ TransformCallback(IntelligibilityEnhancer* parent);

// All in frequency domain, receives input |in_block|, applies

// intelligibility enhancement, and writes result to |out_block|.

@@ -92,17 +85,11 @@ class IntelligibilityEnhancer {

private:

IntelligibilityEnhancer* parent_;

- AudioSource source_;

};

friend class TransformCallback;

FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);

FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

- // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.

- void DispatchAudio(AudioSource source,

- const std::complex<float>* in_block,

- std::complex<float>* out_block);

// Updates variance computation and analysis with |in_block_|,

// and writes modified speech to |out_block|.

void ProcessClearBlock(const std::complex<float>* in_block,

@@ -117,15 +104,11 @@ class IntelligibilityEnhancer {

// Transforms freq gains to ERB gains.

void UpdateErbGains();

- // Updates variance calculation for noise input with |in_block|.

- void ProcessNoiseBlock(const std::complex<float>* in_block,

- std::complex<float>* out_block);

// Returns number of ERB filters.

static size_t GetBankSize(int sample_rate, size_t erb_resolution);

// Initializes ERB filterbank.

- void CreateErbBank();

+ std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);

// Analytically solves quadratic for optimal gains given |lambda|.

// Negative gains are set to 0. Stores the results in |sols|.

@@ -133,7 +116,9 @@ class IntelligibilityEnhancer {

// Computes variance across ERB filters from freq variance |var|.

// Stores in |result|.

- void FilterVariance(const float* var, float* result);

+ void FilterVariance(const float* var,

+ const std::vector<std::vector<float>>& filter_bank,

+ float* result);

// Returns dot product of vectors specified by size |length| arrays |a|,|b|.

static float DotProduct(const float* a, const float* b, size_t length);

@@ -152,11 +137,12 @@ class IntelligibilityEnhancer {

// TODO(ekm): Add logic for updating |active_|.

intelligibility::VarianceArray clear_variance_;

- intelligibility::VarianceArray noise_variance_;

+ std::vector<float> noise_power_;

rtc::scoped_ptr<float[]> filtered_clear_var_;

rtc::scoped_ptr<float[]> filtered_noise_var_;

- std::vector<std::vector<float>> filter_bank_;

rtc::scoped_ptr<float[]> center_freqs_;

+ std::vector<std::vector<float>> capture_filter_bank_;

+ std::vector<std::vector<float>> render_filter_bank_;

size_t start_freq_;

rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

// for each ERB band.

@@ -166,13 +152,10 @@ class IntelligibilityEnhancer {

// Destination buffers used to reassemble blocked chunks before overwriting

// the original input array with modifications.

ChannelBuffer<float> temp_render_out_buffer_;

- ChannelBuffer<float> temp_capture_out_buffer_;

rtc::scoped_ptr<float[]> kbd_window_;

TransformCallback render_callback_;

- TransformCallback capture_callback_;

rtc::scoped_ptr<LappedTransform> render_mangler_;

- rtc::scoped_ptr<LappedTransform> capture_mangler_;

int block_count_;

int analysis_step_;

};