webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1878133002: Disable Intelligibility Enhancer for high SNRs

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1878133002: Disable Intelligibility Enhancer for high SNRs (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

index de36b7a8bc75c943fb436326b8f067e4ca948215..23dca2631f2d96cb67a791cb7727c0ad2ed8b76f 100644

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

@@ -38,6 +38,8 @@ const float kDecayRate = 0.994f; // Power estimation decay rate.

const float kMaxRelativeGainChange = 0.006f;

const float kRho = 0.0004f; // Default production and interpretation SNR.

const float kPowerNormalizationFactor = 1.f / (1 << 30);

+const float kMaxActiveSNR = 128.f; // 21dB

+const float kMinInactiveSNR = 32.f; // 15dB

// Returns dot product of vectors |a| and |b| with size |length|.

float DotProduct(const float* a, const float* b, size_t length) {

@@ -84,6 +86,8 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,

audio_s16_(chunk_length_),

chunks_since_voice_(kSpeechOffsetDelay),

is_speech_(false),

+ snr_(kMaxActiveSNR),

+ is_active_(false),

noise_estimation_buffer_(num_noise_bins),

noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,

std::vector<float>(num_noise_bins),

@@ -135,29 +139,54 @@ void IntelligibilityEnhancer::ProcessAudioBlock(

if (is_speech_) {

clear_power_estimator_.Step(in_block[0]);

}

- const std::vector<float>& clear_power = clear_power_estimator_.power();

- const std::vector<float>& noise_power = noise_power_estimator_.power();

- MapToErbBands(clear_power.data(), render_filter_bank_,

- filtered_clear_pow_.data());

- MapToErbBands(noise_power.data(), capture_filter_bank_,

- filtered_noise_pow_.data());

- SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

- const float power_target = std::accumulate(

- filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f);

- const float power_top =

- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

- SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());

- const float power_bot =

- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

- if (power_target >= power_bot && power_target <= power_top) {

- SolveForLambda(power_target);

- UpdateErbGains();

- } // Else experiencing power underflow, so do nothing.

+ UpdateActivity();

+ if (is_active_) {

+ MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,

+ filtered_clear_pow_.data());

+ MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,

+ filtered_noise_pow_.data());

+ SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

+ const float power_target = std::accumulate(

+ filtered_clear_pow_.data(),

+ filtered_clear_pow_.data() + bank_size_,

+ 0.f);

+ const float power_top =

+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

+ SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());

+ const float power_bot =

+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

+ if (power_target >= power_bot && power_target <= power_top) {

+ SolveForLambda(power_target);

+ UpdateErbGains();

+ } // Else experiencing power underflow, so do nothing.

+ }

for (size_t i = 0; i < in_channels; ++i) {

gain_applier_.Apply(in_block[i], out_block[i]);

}

+void IntelligibilityEnhancer::UpdateActivity() {

peah-webrtc 2016/04/12 13:39:21 What you are updating here is the is_active flag a

aluebs-webrtc 2016/04/12 18:34:28 Yes, your understanding is completely right. And I

+ const float* clear_psd = clear_power_estimator_.power().data();

+ const float* noise_psd = noise_power_estimator_.power().data();

+ const float clear_power =

+ std::accumulate(clear_psd, clear_psd + freqs_, 0.f);

+ const float noise_power =

+ std::accumulate(noise_psd, noise_psd + freqs_, 0.f);

+ snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power / noise_power;

peah-webrtc 2016/04/12 13:39:21 This SNR estimate is an average of the instantaneo

peah-webrtc 2016/04/12 13:39:21 This SNR estimate is assuming that the ratio of th

aluebs-webrtc 2016/04/12 18:34:28 That is an interesting point. Because the PSDs are

aluebs-webrtc 2016/04/12 18:34:28 As discussed offline at the beginning of this proj

+ if (is_active_) {

+ if (snr_ > kMaxActiveSNR) {

+ is_active_ = false;

+ // Set the target gains to unity.

+ float* gains = gain_applier_.target();

+ for (size_t i = 0; i < freqs_; ++i) {

+ gains[i] = 1.f;

+ }

+ } else {

+ is_active_ = snr_ < kMinInactiveSNR;

+ }

void IntelligibilityEnhancer::SolveForLambda(float power_target) {

const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values

const int kMaxIters = 100; // for these, based on experiments.

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »