Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1878133002: Disable Intelligibility Enhancer for high SNRs (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
index de36b7a8bc75c943fb436326b8f067e4ca948215..23dca2631f2d96cb67a791cb7727c0ad2ed8b76f 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@@ -38,6 +38,8 @@ const float kDecayRate = 0.994f; // Power estimation decay rate.
const float kMaxRelativeGainChange = 0.006f;
const float kRho = 0.0004f; // Default production and interpretation SNR.
const float kPowerNormalizationFactor = 1.f / (1 << 30);
+const float kMaxActiveSNR = 128.f; // 21dB
+const float kMinInactiveSNR = 32.f; // 15dB
// Returns dot product of vectors |a| and |b| with size |length|.
float DotProduct(const float* a, const float* b, size_t length) {
@@ -84,6 +86,8 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
audio_s16_(chunk_length_),
chunks_since_voice_(kSpeechOffsetDelay),
is_speech_(false),
+ snr_(kMaxActiveSNR),
+ is_active_(false),
noise_estimation_buffer_(num_noise_bins),
noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
std::vector<float>(num_noise_bins),
@@ -135,29 +139,54 @@ void IntelligibilityEnhancer::ProcessAudioBlock(
if (is_speech_) {
clear_power_estimator_.Step(in_block[0]);
}
- const std::vector<float>& clear_power = clear_power_estimator_.power();
- const std::vector<float>& noise_power = noise_power_estimator_.power();
- MapToErbBands(clear_power.data(), render_filter_bank_,
- filtered_clear_pow_.data());
- MapToErbBands(noise_power.data(), capture_filter_bank_,
- filtered_noise_pow_.data());
- SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
- const float power_target = std::accumulate(
- filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f);
- const float power_top =
- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
- SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
- const float power_bot =
- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
- if (power_target >= power_bot && power_target <= power_top) {
- SolveForLambda(power_target);
- UpdateErbGains();
- } // Else experiencing power underflow, so do nothing.
+ UpdateActivity();
+ if (is_active_) {
+ MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
+ filtered_clear_pow_.data());
+ MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
+ filtered_noise_pow_.data());
+ SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
+ const float power_target = std::accumulate(
+ filtered_clear_pow_.data(),
+ filtered_clear_pow_.data() + bank_size_,
+ 0.f);
+ const float power_top =
+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+ SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
+ const float power_bot =
+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+ if (power_target >= power_bot && power_target <= power_top) {
+ SolveForLambda(power_target);
+ UpdateErbGains();
+ } // Else experiencing power underflow, so do nothing.
+ }
for (size_t i = 0; i < in_channels; ++i) {
gain_applier_.Apply(in_block[i], out_block[i]);
}
}
+void IntelligibilityEnhancer::UpdateActivity() {
peah-webrtc 2016/04/12 13:39:21 What you are updating here is the is_active flag a
aluebs-webrtc 2016/04/12 18:34:28 Yes, your understanding is completely right. And I
+ const float* clear_psd = clear_power_estimator_.power().data();
+ const float* noise_psd = noise_power_estimator_.power().data();
+ const float clear_power =
+ std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
+ const float noise_power =
+ std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
+ snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power / noise_power;
peah-webrtc 2016/04/12 13:39:21 This SNR estimate is an average of the instantaneo
peah-webrtc 2016/04/12 13:39:21 This SNR estimate is assuming that the ratio of th
aluebs-webrtc 2016/04/12 18:34:28 That is an interesting point. Because the PSDs are
aluebs-webrtc 2016/04/12 18:34:28 As discussed offline at the beginning of this proj
+ if (is_active_) {
+ if (snr_ > kMaxActiveSNR) {
+ is_active_ = false;
+ // Set the target gains to unity.
+ float* gains = gain_applier_.target();
+ for (size_t i = 0; i < freqs_; ++i) {
+ gains[i] = 1.f;
+ }
+ }
+ } else {
+ is_active_ = snr_ < kMinInactiveSNR;
+ }
+}
+
void IntelligibilityEnhancer::SolveForLambda(float power_target) {
const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
const int kMaxIters = 100; // for these, based on experiments.
« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698