webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1672343002: Using the NS noise estimate for the IE

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1672343002: Using the NS noise estimate for the IE (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ns

Patch Set: CHECK instead of if-guarding Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

index 1eb22342ad07233f8b28c9abc0d55880b9ee8076..fade1449ccdb3886af3dae5154fef3ee6f176bac 100644

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

@@ -60,10 +60,8 @@ class IntelligibilityEnhancer {

explicit IntelligibilityEnhancer(const Config& config);

IntelligibilityEnhancer(); // Initialize with default config.

- // Reads and processes chunk of noise stream in time domain.

- void AnalyzeCaptureAudio(float* const* audio,

- int sample_rate_hz,

- size_t num_channels);

+ // Sets the capture noise magnitude spectrum estimate.

+ void SetCaptureNoiseEstimate(std::vector<float> noise);

// Reads chunk of speech in time domain and updates with modified signal.

void ProcessRenderAudio(float* const* audio,

@@ -72,15 +70,10 @@ class IntelligibilityEnhancer {

bool active() const;

private:

- enum AudioSource {

- kRenderStream = 0, // Clear speech stream.

- kCaptureStream, // Noise stream.

- };

// Provides access point to the frequency domain.

class TransformCallback : public LappedTransform::Callback {

public:

- TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);

+ TransformCallback(IntelligibilityEnhancer* parent);

// All in frequency domain, receives input |in_block|, applies

// intelligibility enhancement, and writes result to |out_block|.

@@ -92,17 +85,11 @@ class IntelligibilityEnhancer {

private:

IntelligibilityEnhancer* parent_;

- AudioSource source_;

};

friend class TransformCallback;

FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);

FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

- // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.

- void DispatchAudio(AudioSource source,

- const std::complex<float>* in_block,

- std::complex<float>* out_block);

// Updates variance computation and analysis with |in_block_|,

// and writes modified speech to |out_block|.

void ProcessClearBlock(const std::complex<float>* in_block,

@@ -117,27 +104,16 @@ class IntelligibilityEnhancer {

// Transforms freq gains to ERB gains.

void UpdateErbGains();

- // Updates variance calculation for noise input with |in_block|.

- void ProcessNoiseBlock(const std::complex<float>* in_block,

- std::complex<float>* out_block);

// Returns number of ERB filters.

static size_t GetBankSize(int sample_rate, size_t erb_resolution);

// Initializes ERB filterbank.

- void CreateErbBank();

+ std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);

// Analytically solves quadratic for optimal gains given |lambda|.

// Negative gains are set to 0. Stores the results in |sols|.

void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

- // Computes variance across ERB filters from freq variance |var|.

- // Stores in |result|.

- void FilterVariance(const float* var, float* result);

- // Returns dot product of vectors specified by size |length| arrays |a|,|b|.

- static float DotProduct(const float* a, const float* b, size_t length);

const size_t freqs_; // Num frequencies in frequency domain.

const size_t window_size_; // Window size in samples; also the block size.

const size_t chunk_length_; // Chunk size in samples.

@@ -152,11 +128,12 @@ class IntelligibilityEnhancer {

// TODO(ekm): Add logic for updating |active_|.

intelligibility::VarianceArray clear_variance_;

- intelligibility::VarianceArray noise_variance_;

+ std::vector<float> noise_power_;

rtc::scoped_ptr<float[]> filtered_clear_var_;

rtc::scoped_ptr<float[]> filtered_noise_var_;

- std::vector<std::vector<float>> filter_bank_;

rtc::scoped_ptr<float[]> center_freqs_;

+ std::vector<std::vector<float>> capture_filter_bank_;

+ std::vector<std::vector<float>> render_filter_bank_;

size_t start_freq_;

rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

// for each ERB band.

@@ -166,13 +143,10 @@ class IntelligibilityEnhancer {

// Destination buffers used to reassemble blocked chunks before overwriting

// the original input array with modifications.

ChannelBuffer<float> temp_render_out_buffer_;

- ChannelBuffer<float> temp_capture_out_buffer_;

rtc::scoped_ptr<float[]> kbd_window_;

TransformCallback render_callback_;

- TransformCallback capture_callback_;

rtc::scoped_ptr<LappedTransform> render_mangler_;

- rtc::scoped_ptr<LappedTransform> capture_mangler_;

int block_count_;

int analysis_step_;

};