Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(106)

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1672343002: Using the NS noise estimate for the IE (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ns
Patch Set: CHECK instead of if-guarding Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
index 1eb22342ad07233f8b28c9abc0d55880b9ee8076..fade1449ccdb3886af3dae5154fef3ee6f176bac 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@@ -60,10 +60,8 @@ class IntelligibilityEnhancer {
explicit IntelligibilityEnhancer(const Config& config);
IntelligibilityEnhancer(); // Initialize with default config.
- // Reads and processes chunk of noise stream in time domain.
- void AnalyzeCaptureAudio(float* const* audio,
- int sample_rate_hz,
- size_t num_channels);
+ // Sets the capture noise magnitude spectrum estimate.
+ void SetCaptureNoiseEstimate(std::vector<float> noise);
// Reads chunk of speech in time domain and updates with modified signal.
void ProcessRenderAudio(float* const* audio,
@@ -72,15 +70,10 @@ class IntelligibilityEnhancer {
bool active() const;
private:
- enum AudioSource {
- kRenderStream = 0, // Clear speech stream.
- kCaptureStream, // Noise stream.
- };
-
// Provides access point to the frequency domain.
class TransformCallback : public LappedTransform::Callback {
public:
- TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);
+ TransformCallback(IntelligibilityEnhancer* parent);
// All in frequency domain, receives input |in_block|, applies
// intelligibility enhancement, and writes result to |out_block|.
@@ -92,17 +85,11 @@ class IntelligibilityEnhancer {
private:
IntelligibilityEnhancer* parent_;
- AudioSource source_;
};
friend class TransformCallback;
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
- // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.
- void DispatchAudio(AudioSource source,
- const std::complex<float>* in_block,
- std::complex<float>* out_block);
-
// Updates variance computation and analysis with |in_block_|,
// and writes modified speech to |out_block|.
void ProcessClearBlock(const std::complex<float>* in_block,
@@ -117,27 +104,16 @@ class IntelligibilityEnhancer {
// Transforms freq gains to ERB gains.
void UpdateErbGains();
- // Updates variance calculation for noise input with |in_block|.
- void ProcessNoiseBlock(const std::complex<float>* in_block,
- std::complex<float>* out_block);
-
// Returns number of ERB filters.
static size_t GetBankSize(int sample_rate, size_t erb_resolution);
// Initializes ERB filterbank.
- void CreateErbBank();
+ std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
// Analytically solves quadratic for optimal gains given |lambda|.
// Negative gains are set to 0. Stores the results in |sols|.
void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
- // Computes variance across ERB filters from freq variance |var|.
- // Stores in |result|.
- void FilterVariance(const float* var, float* result);
-
- // Returns dot product of vectors specified by size |length| arrays |a|,|b|.
- static float DotProduct(const float* a, const float* b, size_t length);
-
const size_t freqs_; // Num frequencies in frequency domain.
const size_t window_size_; // Window size in samples; also the block size.
const size_t chunk_length_; // Chunk size in samples.
@@ -152,11 +128,12 @@ class IntelligibilityEnhancer {
// TODO(ekm): Add logic for updating |active_|.
intelligibility::VarianceArray clear_variance_;
- intelligibility::VarianceArray noise_variance_;
+ std::vector<float> noise_power_;
rtc::scoped_ptr<float[]> filtered_clear_var_;
rtc::scoped_ptr<float[]> filtered_noise_var_;
- std::vector<std::vector<float>> filter_bank_;
rtc::scoped_ptr<float[]> center_freqs_;
+ std::vector<std::vector<float>> capture_filter_bank_;
+ std::vector<std::vector<float>> render_filter_bank_;
size_t start_freq_;
rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.
// for each ERB band.
@@ -166,13 +143,10 @@ class IntelligibilityEnhancer {
// Destination buffers used to reassemble blocked chunks before overwriting
// the original input array with modifications.
ChannelBuffer<float> temp_render_out_buffer_;
- ChannelBuffer<float> temp_capture_out_buffer_;
rtc::scoped_ptr<float[]> kbd_window_;
TransformCallback render_callback_;
- TransformCallback capture_callback_;
rtc::scoped_ptr<LappedTransform> render_mangler_;
- rtc::scoped_ptr<LappedTransform> capture_mangler_;
int block_count_;
int analysis_step_;
};

Powered by Google App Engine
This is Rietveld 408576698