webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1187033005: Revert of Allow intelligibility to compile in apm

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1187033005: Revert of Allow intelligibility to compile in apm (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_tests.gypi ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

index 8125707f120981c40817152a80bcbea43f2e3006..d0818f688c5e6750b430dd81a429ccb3432a7f10 100644

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

@@ -8,18 +8,14 @@

* be found in the AUTHORS file in the root of the source tree.

-//

-// Specifies core class for intelligbility enhancement.

-//

#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_

#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_

#include <complex>

-#include "webrtc/base/scoped_ptr.h"

#include "webrtc/common_audio/lapped_transform.h"

#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"

+#include "webrtc/system_wrappers/interface/scoped_ptr.h"

struct WebRtcVadInst;

typedef struct WebRtcVadInst VadInst;

@@ -29,7 +25,6 @@

// Speech intelligibility enhancement module. Reads render and capture

// audio streams and modifies the render stream with a set of gains per

// frequency bin to enhance speech against the noise background.

-// Note: assumes speech and noise streams are already separated.

class IntelligibilityEnhancer {

public:

// Construct a new instance with the given filter bank resolution,

@@ -38,43 +33,30 @@

// to elapse before a new gain computation is made. |variance_rate| specifies

// the number of gain recomputations after which the variances are reset.

// |cv_*| are parameters for the VarianceArray constructor for the

- // clear speech stream.

+ // lear speech stream.

// TODO(bercic): the |cv_*|, |*_rate| and |gain_limit| parameters should

// probably go away once fine tuning is done. They override the internal

// constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).

- IntelligibilityEnhancer(int erb_resolution,

- int sample_rate_hz,

- int channels,

- int cv_type,

- float cv_alpha,

- int cv_win,

- int analysis_rate,

- int variance_rate,

+ IntelligibilityEnhancer(int erb_resolution, int sample_rate_hz, int channels,

+ int cv_type, float cv_alpha, int cv_win,

+ int analysis_rate, int variance_rate,

float gain_limit);

~IntelligibilityEnhancer();

- // Reads and processes chunk of noise stream in time domain.

+ void ProcessRenderAudio(float* const* audio);

void ProcessCaptureAudio(float* const* audio);

- // Reads chunk of speech in time domain and updates with modified signal.

- void ProcessRenderAudio(float* const* audio);

private:

enum AudioSource {

- kRenderStream = 0, // Clear speech stream.

- kCaptureStream, // Noise stream.

+ kRenderStream = 0,

+ kCaptureStream,

};

- // Provides access point to the frequency domain.

class TransformCallback : public LappedTransform::Callback {

public:

TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);

- // All in frequency domain, receives input |in_block|, applies

- // intelligibility enhancement, and writes result to |out_block|.

virtual void ProcessAudioBlock(const std::complex<float>* const* in_block,

- int in_channels,

- int frames,

+ int in_channels, int frames,

int out_channels,

std::complex<float>* const* out_block);

@@ -84,95 +66,72 @@

};

friend class TransformCallback;

- // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.

- void DispatchAudio(AudioSource source,

- const std::complex<float>* in_block,

+ void DispatchAudio(AudioSource source, const std::complex<float>* in_block,

std::complex<float>* out_block);

- // Updates variance computation and analysis with |in_block_|,

- // and writes modified speech to |out_block|.

void ProcessClearBlock(const std::complex<float>* in_block,

std::complex<float>* out_block);

- // Computes and sets modified gains.

void AnalyzeClearBlock(float power_target);

- // Updates variance calculation for noise input with |in_block|.

void ProcessNoiseBlock(const std::complex<float>* in_block,

std::complex<float>* out_block);

- // Returns number of ERB filters.

static int GetBankSize(int sample_rate, int erb_resolution);

- // Initializes ERB filterbank.

void CreateErbBank();

- // Analytically solves quadratic for optimal gains given |lambda|.

- // Negative gains are set to 0. Stores the results in |sols|.

- void SolveForGainsGivenLambda(float lambda, int start_freq, float* sols);

- // Computes variance across ERB filters from freq variance |var|.

- // Stores in |result|.

+ void SolveEquation14(float lambda, int start_freq, float* sols);

void FilterVariance(const float* var, float* result);

- // Returns dot product of vectors specified by size |length| arrays |a|,|b|.

static float DotProduct(const float* a, const float* b, int length);

static const int kErbResolution;

static const int kWindowSizeMs;

static const int kChunkSizeMs;

- static const int kAnalyzeRate; // Default for |analysis_rate_|.

- static const int kVarianceRate; // Default for |variance_rate_|.

+ static const int kAnalyzeRate;

+ static const int kVarianceRate;

static const float kClipFreq;

- static const float kConfigRho; // Default production and interpretation SNR.

+ static const float kConfigRho;

static const float kKbdAlpha;

static const float kGainChangeLimit;

- const int freqs_; // Num frequencies in frequency domain.

- const int window_size_; // Window size in samples; also the block size.

- const int chunk_length_; // Chunk size in samples.

- const int bank_size_; // Num ERB filters.

+ const int freqs_;

+ const int window_size_; // window size in samples; also the block size

+ const int chunk_length_; // chunk size in samples

+ const int bank_size_;

const int sample_rate_hz_;

const int erb_resolution_;

- const int channels_; // Num channels.

- const int analysis_rate_; // Num blocks before gains recalculated.

- const int variance_rate_; // Num recalculations before history is cleared.

+ const int channels_;

+ const int analysis_rate_;

+ const int variance_rate_;

intelligibility::VarianceArray clear_variance_;

intelligibility::VarianceArray noise_variance_;

- rtc::scoped_ptr<float[]> filtered_clear_var_;

- rtc::scoped_ptr<float[]> filtered_noise_var_;

- float** filter_bank_; // TODO(ekmeyerson): Switch to using ChannelBuffer.

- rtc::scoped_ptr<float[]> center_freqs_;

+ scoped_ptr<float[]> filtered_clear_var_;

+ scoped_ptr<float[]> filtered_noise_var_;

+ float** filter_bank_;

+ scoped_ptr<float[]> center_freqs_;

int start_freq_;

- rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

- // for each ERB band.

- rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.

+ scoped_ptr<float[]> rho_;

+ scoped_ptr<float[]> gains_eq_;

intelligibility::GainApplier gain_applier_;

// Destination buffer used to reassemble blocked chunks before overwriting

// the original input array with modifications.

- // TODO(ekmeyerson): Switch to using ChannelBuffer.

float** temp_out_buffer_;

- rtc::scoped_ptr<float* []> input_audio_;

- rtc::scoped_ptr<float[]> kbd_window_;

+ scoped_ptr<float*[]> input_audio_;

+ scoped_ptr<float[]> kbd_window_;

TransformCallback render_callback_;

TransformCallback capture_callback_;

- rtc::scoped_ptr<LappedTransform> render_mangler_;

- rtc::scoped_ptr<LappedTransform> capture_mangler_;

+ scoped_ptr<LappedTransform> render_mangler_;

+ scoped_ptr<LappedTransform> capture_mangler_;

int block_count_;

int analysis_step_;

// TODO(bercic): Quick stopgap measure for voice detection in the clear

// and noise streams.

- // Note: VAD currently does not affect anything in IntelligibilityEnhancer.

VadInst* vad_high_;

VadInst* vad_low_;

- rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;

- bool has_voice_low_; // Whether voice detected in speech stream.

+ scoped_ptr<int16_t[]> vad_tmp_buffer_;

+ bool has_voice_low_;

};

} // namespace webrtc

#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_