Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1394)

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1672343002: Using the NS noise estimate for the IE (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ns
Patch Set: Rebasing Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
index fe964aba8c3eb2a795f30db17cc49d93c2f788fa..3f5254550494338252b70f3a415c24891eae5443 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@@ -39,6 +39,26 @@ const float kKbdAlpha = 1.5f;
const float kLambdaBot = -1.0f; // Extreme values in bisection
const float kLambdaTop = -10e-18f; // search for lamda.
+// Returns dot product of vectors |a| and |b| with size |length|.
+float DotProduct(const float* a, const float* b, size_t length) {
+ float ret = 0.f;
+ for (size_t i = 0; i < length; ++i) {
+ ret = fmaf(a[i], b[i], ret);
+ }
+ return ret;
+}
+
+// Computes the power across ERB filters from the power spectral density |var|.
+// Stores it in |result|.
+void FilterVariance(const float* var,
+ const std::vector<std::vector<float>>& filter_bank,
+ float* result) {
+ for (size_t i = 0; i < filter_bank.size(); ++i) {
+ RTC_DCHECK_GT(filter_bank[i].size(), 0u);
+ result[i] = DotProduct(&filter_bank[i][0], var, filter_bank[i].size());
+ }
+}
+
} // namespace
using std::complex;
@@ -47,9 +67,8 @@ using std::min;
using VarianceType = intelligibility::VarianceArray::StepType;
IntelligibilityEnhancer::TransformCallback::TransformCallback(
- IntelligibilityEnhancer* parent,
- IntelligibilityEnhancer::AudioSource source)
- : parent_(parent), source_(source) {
+ IntelligibilityEnhancer* parent)
+ : parent_(parent) {
}
void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
@@ -60,7 +79,7 @@ void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
complex<float>* const* out_block) {
RTC_DCHECK_EQ(parent_->freqs_, frames);
for (size_t i = 0; i < in_channels; ++i) {
- parent_->DispatchAudio(source_, in_block[i], out_block[i]);
+ parent_->ProcessClearBlock(in_block[i], out_block[i]);
}
}
@@ -85,27 +104,26 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
config.var_type,
config.var_window_size,
config.var_decay_rate),
- noise_variance_(freqs_,
- config.var_type,
- config.var_window_size,
- config.var_decay_rate),
filtered_clear_var_(new float[bank_size_]),
filtered_noise_var_(new float[bank_size_]),
- filter_bank_(bank_size_),
center_freqs_(new float[bank_size_]),
+ render_filter_bank_(CreateErbBank(freqs_)),
rho_(new float[bank_size_]),
gains_eq_(new float[bank_size_]),
gain_applier_(freqs_, config.gain_change_limit),
temp_render_out_buffer_(chunk_length_, num_render_channels_),
- temp_capture_out_buffer_(chunk_length_, num_capture_channels_),
kbd_window_(new float[window_size_]),
- render_callback_(this, AudioSource::kRenderStream),
- capture_callback_(this, AudioSource::kCaptureStream),
+ render_callback_(this),
block_count_(0),
analysis_step_(0) {
RTC_DCHECK_LE(config.rho, 1.0f);
- CreateErbBank();
+ memset(filtered_clear_var_.get(),
+ 0,
+ bank_size_ * sizeof(filtered_clear_var_[0]));
+ memset(filtered_noise_var_.get(),
+ 0,
+ bank_size_ * sizeof(filtered_noise_var_[0]));
// Assumes all rho equal.
for (size_t i = 0; i < bank_size_; ++i) {
@@ -122,9 +140,20 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
render_mangler_.reset(new LappedTransform(
num_render_channels_, num_render_channels_, chunk_length_,
kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
- capture_mangler_.reset(new LappedTransform(
- num_capture_channels_, num_capture_channels_, chunk_length_,
- kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));
+}
+
+void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
+ std::vector<float> noise) {
+ if (capture_filter_bank_.size() != bank_size_ ||
+ capture_filter_bank_[0].size() != noise.size()) {
+ capture_filter_bank_ = CreateErbBank(noise.size());
+ }
+ for (size_t i = 0; i < noise.size(); ++i) {
+ noise[i] *= noise[i];
+ }
+ FilterVariance(&noise[0],
turaj 2016/02/09 16:40:33 Bastiaan's idea is one does not need to update IE
aluebs-webrtc 2016/02/09 19:13:35 Good point! I moved the FilterVariance to where it
+ capture_filter_bank_,
+ filtered_noise_var_.get());
}
void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
@@ -145,29 +174,6 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
}
}
-void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio,
- int sample_rate_hz,
- size_t num_channels) {
- RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
- RTC_CHECK_EQ(num_capture_channels_, num_channels);
-
- capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels());
-}
-
-void IntelligibilityEnhancer::DispatchAudio(
- IntelligibilityEnhancer::AudioSource source,
- const complex<float>* in_block,
- complex<float>* out_block) {
- switch (source) {
- case kRenderStream:
- ProcessClearBlock(in_block, out_block);
- break;
- case kCaptureStream:
- ProcessNoiseBlock(in_block, out_block);
- break;
- }
-}
-
void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
complex<float>* out_block) {
if (block_count_ < 2) {
@@ -194,9 +200,9 @@ void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
}
void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
- FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());
- FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());
-
+ FilterVariance(clear_variance_.variance(),
+ render_filter_bank_,
+ filtered_clear_var_.get());
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
const float power_top =
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
@@ -242,16 +248,11 @@ void IntelligibilityEnhancer::UpdateErbGains() {
for (size_t i = 0; i < freqs_; ++i) {
gains[i] = 0.0f;
for (size_t j = 0; j < bank_size_; ++j) {
- gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
+ gains[i] = fmaf(render_filter_bank_[j][i], gains_eq_[j], gains[i]);
}
}
}
-void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,
- complex<float>* /*out_block*/) {
- noise_variance_.Step(in_block);
-}
-
size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
size_t erb_resolution) {
float freq_limit = sample_rate / 2000.0f;
@@ -260,7 +261,9 @@ size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
return erb_scale * erb_resolution;
}
-void IntelligibilityEnhancer::CreateErbBank() {
+std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
+ size_t num_freqs) {
+ std::vector<std::vector<float>> filter_bank(bank_size_);
size_t lf = 1, rf = 4;
for (size_t i = 0; i < bank_size_; ++i) {
@@ -274,58 +277,60 @@ void IntelligibilityEnhancer::CreateErbBank() {
}
for (size_t i = 0; i < bank_size_; ++i) {
- filter_bank_[i].resize(freqs_);
+ filter_bank[i].resize(num_freqs);
}
for (size_t i = 1; i <= bank_size_; ++i) {
size_t lll, ll, rr, rrr;
static const size_t kOne = 1; // Avoids repeated static_cast<>s below.
lll = static_cast<size_t>(round(
- center_freqs_[max(kOne, i - lf) - 1] * freqs_ /
+ center_freqs_[max(kOne, i - lf) - 1] * num_freqs /
(0.5f * sample_rate_hz_)));
ll = static_cast<size_t>(round(
- center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));
- lll = min(freqs_, max(lll, kOne)) - 1;
- ll = min(freqs_, max(ll, kOne)) - 1;
+ center_freqs_[max(kOne, i) - 1] * num_freqs /
+ (0.5f * sample_rate_hz_)));
+ lll = min(num_freqs, max(lll, kOne)) - 1;
+ ll = min(num_freqs, max(ll, kOne)) - 1;
rrr = static_cast<size_t>(round(
- center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
+ center_freqs_[min(bank_size_, i + rf) - 1] * num_freqs /
(0.5f * sample_rate_hz_)));
rr = static_cast<size_t>(round(
- center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
+ center_freqs_[min(bank_size_, i + 1) - 1] * num_freqs /
(0.5f * sample_rate_hz_)));
- rrr = min(freqs_, max(rrr, kOne)) - 1;
- rr = min(freqs_, max(rr, kOne)) - 1;
+ rrr = min(num_freqs, max(rrr, kOne)) - 1;
+ rr = min(num_freqs, max(rr, kOne)) - 1;
float step, element;
step = 1.0f / (ll - lll);
element = 0.0f;
for (size_t j = lll; j <= ll; ++j) {
- filter_bank_[i - 1][j] = element;
+ filter_bank[i - 1][j] = element;
element += step;
}
step = 1.0f / (rrr - rr);
element = 1.0f;
for (size_t j = rr; j <= rrr; ++j) {
- filter_bank_[i - 1][j] = element;
+ filter_bank[i - 1][j] = element;
element -= step;
}
for (size_t j = ll; j <= rr; ++j) {
- filter_bank_[i - 1][j] = 1.0f;
+ filter_bank[i - 1][j] = 1.0f;
}
}
float sum;
- for (size_t i = 0; i < freqs_; ++i) {
+ for (size_t i = 0; i < num_freqs; ++i) {
sum = 0.0f;
for (size_t j = 0; j < bank_size_; ++j) {
- sum += filter_bank_[j][i];
+ sum += filter_bank[j][i];
}
for (size_t j = 0; j < bank_size_; ++j) {
- filter_bank_[j][i] /= sum;
+ filter_bank[j][i] /= sum;
}
}
+ return filter_bank;
}
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
@@ -356,24 +361,6 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
}
}
-void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
- RTC_DCHECK_GT(freqs_, 0u);
- for (size_t i = 0; i < bank_size_; ++i) {
- result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);
- }
-}
-
-float IntelligibilityEnhancer::DotProduct(const float* a,
- const float* b,
- size_t length) {
- float ret = 0.0f;
-
- for (size_t i = 0; i < length; ++i) {
- ret = fmaf(a[i], b[i], ret);
- }
- return ret;
-}
-
bool IntelligibilityEnhancer::active() const {
return active_;
}

Powered by Google App Engine
This is Rietveld 408576698