Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
index fe964aba8c3eb2a795f30db17cc49d93c2f788fa..3f5254550494338252b70f3a415c24891eae5443 100644 |
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc |
@@ -39,6 +39,26 @@ const float kKbdAlpha = 1.5f; |
const float kLambdaBot = -1.0f; // Extreme values in bisection |
const float kLambdaTop = -10e-18f; // search for lamda. |
+// Returns dot product of vectors |a| and |b| with size |length|. |
+float DotProduct(const float* a, const float* b, size_t length) { |
+ float ret = 0.f; |
+ for (size_t i = 0; i < length; ++i) { |
+ ret = fmaf(a[i], b[i], ret); |
+ } |
+ return ret; |
+} |
+ |
+// Computes the power across ERB filters from the power spectral density |var|. |
+// Stores it in |result|. |
+void FilterVariance(const float* var, |
+ const std::vector<std::vector<float>>& filter_bank, |
+ float* result) { |
+ for (size_t i = 0; i < filter_bank.size(); ++i) { |
+ RTC_DCHECK_GT(filter_bank[i].size(), 0u); |
+ result[i] = DotProduct(&filter_bank[i][0], var, filter_bank[i].size()); |
+ } |
+} |
+ |
} // namespace |
using std::complex; |
@@ -47,9 +67,8 @@ using std::min; |
using VarianceType = intelligibility::VarianceArray::StepType; |
IntelligibilityEnhancer::TransformCallback::TransformCallback( |
- IntelligibilityEnhancer* parent, |
- IntelligibilityEnhancer::AudioSource source) |
- : parent_(parent), source_(source) { |
+ IntelligibilityEnhancer* parent) |
+ : parent_(parent) { |
} |
void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( |
@@ -60,7 +79,7 @@ void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( |
complex<float>* const* out_block) { |
RTC_DCHECK_EQ(parent_->freqs_, frames); |
for (size_t i = 0; i < in_channels; ++i) { |
- parent_->DispatchAudio(source_, in_block[i], out_block[i]); |
+ parent_->ProcessClearBlock(in_block[i], out_block[i]); |
} |
} |
@@ -85,27 +104,26 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) |
config.var_type, |
config.var_window_size, |
config.var_decay_rate), |
- noise_variance_(freqs_, |
- config.var_type, |
- config.var_window_size, |
- config.var_decay_rate), |
filtered_clear_var_(new float[bank_size_]), |
filtered_noise_var_(new float[bank_size_]), |
- filter_bank_(bank_size_), |
center_freqs_(new float[bank_size_]), |
+ render_filter_bank_(CreateErbBank(freqs_)), |
rho_(new float[bank_size_]), |
gains_eq_(new float[bank_size_]), |
gain_applier_(freqs_, config.gain_change_limit), |
temp_render_out_buffer_(chunk_length_, num_render_channels_), |
- temp_capture_out_buffer_(chunk_length_, num_capture_channels_), |
kbd_window_(new float[window_size_]), |
- render_callback_(this, AudioSource::kRenderStream), |
- capture_callback_(this, AudioSource::kCaptureStream), |
+ render_callback_(this), |
block_count_(0), |
analysis_step_(0) { |
RTC_DCHECK_LE(config.rho, 1.0f); |
- CreateErbBank(); |
+ memset(filtered_clear_var_.get(), |
+ 0, |
+ bank_size_ * sizeof(filtered_clear_var_[0])); |
+ memset(filtered_noise_var_.get(), |
+ 0, |
+ bank_size_ * sizeof(filtered_noise_var_[0])); |
// Assumes all rho equal. |
for (size_t i = 0; i < bank_size_; ++i) { |
@@ -122,9 +140,20 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) |
render_mangler_.reset(new LappedTransform( |
num_render_channels_, num_render_channels_, chunk_length_, |
kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); |
- capture_mangler_.reset(new LappedTransform( |
- num_capture_channels_, num_capture_channels_, chunk_length_, |
- kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); |
+} |
+ |
+void IntelligibilityEnhancer::SetCaptureNoiseEstimate( |
+ std::vector<float> noise) { |
+ if (capture_filter_bank_.size() != bank_size_ || |
+ capture_filter_bank_[0].size() != noise.size()) { |
+ capture_filter_bank_ = CreateErbBank(noise.size()); |
+ } |
+ for (size_t i = 0; i < noise.size(); ++i) { |
+ noise[i] *= noise[i]; |
+ } |
+ FilterVariance(&noise[0], |
turaj
2016/02/09 16:40:33
Bastiaan's idea is one does not need to update IE
aluebs-webrtc
2016/02/09 19:13:35
Good point! I moved the FilterVariance to where it
|
+ capture_filter_bank_, |
+ filtered_noise_var_.get()); |
} |
void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, |
@@ -145,29 +174,6 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, |
} |
} |
-void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio, |
- int sample_rate_hz, |
- size_t num_channels) { |
- RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); |
- RTC_CHECK_EQ(num_capture_channels_, num_channels); |
- |
- capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels()); |
-} |
- |
-void IntelligibilityEnhancer::DispatchAudio( |
- IntelligibilityEnhancer::AudioSource source, |
- const complex<float>* in_block, |
- complex<float>* out_block) { |
- switch (source) { |
- case kRenderStream: |
- ProcessClearBlock(in_block, out_block); |
- break; |
- case kCaptureStream: |
- ProcessNoiseBlock(in_block, out_block); |
- break; |
- } |
-} |
- |
void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, |
complex<float>* out_block) { |
if (block_count_ < 2) { |
@@ -194,9 +200,9 @@ void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block, |
} |
void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { |
- FilterVariance(clear_variance_.variance(), filtered_clear_var_.get()); |
- FilterVariance(noise_variance_.variance(), filtered_noise_var_.get()); |
- |
+ FilterVariance(clear_variance_.variance(), |
+ render_filter_bank_, |
+ filtered_clear_var_.get()); |
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); |
const float power_top = |
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); |
@@ -242,16 +248,11 @@ void IntelligibilityEnhancer::UpdateErbGains() { |
for (size_t i = 0; i < freqs_; ++i) { |
gains[i] = 0.0f; |
for (size_t j = 0; j < bank_size_; ++j) { |
- gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); |
+ gains[i] = fmaf(render_filter_bank_[j][i], gains_eq_[j], gains[i]); |
} |
} |
} |
-void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block, |
- complex<float>* /*out_block*/) { |
- noise_variance_.Step(in_block); |
-} |
- |
size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, |
size_t erb_resolution) { |
float freq_limit = sample_rate / 2000.0f; |
@@ -260,7 +261,9 @@ size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, |
return erb_scale * erb_resolution; |
} |
-void IntelligibilityEnhancer::CreateErbBank() { |
+std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank( |
+ size_t num_freqs) { |
+ std::vector<std::vector<float>> filter_bank(bank_size_); |
size_t lf = 1, rf = 4; |
for (size_t i = 0; i < bank_size_; ++i) { |
@@ -274,58 +277,60 @@ void IntelligibilityEnhancer::CreateErbBank() { |
} |
for (size_t i = 0; i < bank_size_; ++i) { |
- filter_bank_[i].resize(freqs_); |
+ filter_bank[i].resize(num_freqs); |
} |
for (size_t i = 1; i <= bank_size_; ++i) { |
size_t lll, ll, rr, rrr; |
static const size_t kOne = 1; // Avoids repeated static_cast<>s below. |
lll = static_cast<size_t>(round( |
- center_freqs_[max(kOne, i - lf) - 1] * freqs_ / |
+ center_freqs_[max(kOne, i - lf) - 1] * num_freqs / |
(0.5f * sample_rate_hz_))); |
ll = static_cast<size_t>(round( |
- center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_))); |
- lll = min(freqs_, max(lll, kOne)) - 1; |
- ll = min(freqs_, max(ll, kOne)) - 1; |
+ center_freqs_[max(kOne, i) - 1] * num_freqs / |
+ (0.5f * sample_rate_hz_))); |
+ lll = min(num_freqs, max(lll, kOne)) - 1; |
+ ll = min(num_freqs, max(ll, kOne)) - 1; |
rrr = static_cast<size_t>(round( |
- center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / |
+ center_freqs_[min(bank_size_, i + rf) - 1] * num_freqs / |
(0.5f * sample_rate_hz_))); |
rr = static_cast<size_t>(round( |
- center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / |
+ center_freqs_[min(bank_size_, i + 1) - 1] * num_freqs / |
(0.5f * sample_rate_hz_))); |
- rrr = min(freqs_, max(rrr, kOne)) - 1; |
- rr = min(freqs_, max(rr, kOne)) - 1; |
+ rrr = min(num_freqs, max(rrr, kOne)) - 1; |
+ rr = min(num_freqs, max(rr, kOne)) - 1; |
float step, element; |
step = 1.0f / (ll - lll); |
element = 0.0f; |
for (size_t j = lll; j <= ll; ++j) { |
- filter_bank_[i - 1][j] = element; |
+ filter_bank[i - 1][j] = element; |
element += step; |
} |
step = 1.0f / (rrr - rr); |
element = 1.0f; |
for (size_t j = rr; j <= rrr; ++j) { |
- filter_bank_[i - 1][j] = element; |
+ filter_bank[i - 1][j] = element; |
element -= step; |
} |
for (size_t j = ll; j <= rr; ++j) { |
- filter_bank_[i - 1][j] = 1.0f; |
+ filter_bank[i - 1][j] = 1.0f; |
} |
} |
float sum; |
- for (size_t i = 0; i < freqs_; ++i) { |
+ for (size_t i = 0; i < num_freqs; ++i) { |
sum = 0.0f; |
for (size_t j = 0; j < bank_size_; ++j) { |
- sum += filter_bank_[j][i]; |
+ sum += filter_bank[j][i]; |
} |
for (size_t j = 0; j < bank_size_; ++j) { |
- filter_bank_[j][i] /= sum; |
+ filter_bank[j][i] /= sum; |
} |
} |
+ return filter_bank; |
} |
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
@@ -356,24 +361,6 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, |
} |
} |
-void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { |
- RTC_DCHECK_GT(freqs_, 0u); |
- for (size_t i = 0; i < bank_size_; ++i) { |
- result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); |
- } |
-} |
- |
-float IntelligibilityEnhancer::DotProduct(const float* a, |
- const float* b, |
- size_t length) { |
- float ret = 0.0f; |
- |
- for (size_t i = 0; i < length; ++i) { |
- ret = fmaf(a[i], b[i], ret); |
- } |
- return ret; |
-} |
- |
bool IntelligibilityEnhancer::active() const { |
return active_; |
} |