Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(723)

Unified Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)
Patch Set: Improve comment Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
index 45f338c642540acaabf8fb96652b55c28f51cce0..0c56870976cefff579cc17b4c54edbaea30f9785 100644
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
@@ -202,11 +202,12 @@ static_assert(arraysize(kTestCenterFreqs) ==
const float kMaxTestError = 0.005f;
// Enhancer initialization parameters.
-const int kSamples = 1000;
+const int kSamples = 10000;
const int kSampleRate = 4000;
const int kNumChannels = 1;
const int kFragmentSize = kSampleRate / 100;
const size_t kNumNoiseBins = 129;
+const size_t kNumBands = 1;
// Number of frames to process in the bitexactness tests.
const size_t kNumFramesToProcess = 1000;
@@ -228,10 +229,7 @@ void ProcessOneFrame(int sample_rate_hz,
capture_audio_buffer->SplitIntoFrequencyBands();
}
- intelligibility_enhancer->ProcessRenderAudio(
- render_audio_buffer->split_channels_f(kBand0To8kHz),
- IntelligibilityEnhancerSampleRate(sample_rate_hz),
- render_audio_buffer->num_channels());
+ intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
@@ -276,7 +274,8 @@ void RunBitexactnessTest(int sample_rate_hz,
IntelligibilityEnhancer intelligibility_enhancer(
IntelligibilityEnhancerSampleRate(sample_rate_hz),
- render_config.num_channels(), NoiseSuppressionImpl::num_noise_bins());
+ render_config.num_channels(), kNumBands,
+ NoiseSuppressionImpl::num_noise_bins());
for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
@@ -320,24 +319,34 @@ float float_rand() {
class IntelligibilityEnhancerTest : public ::testing::Test {
protected:
IntelligibilityEnhancerTest()
- : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {
+ : clear_buffer_(kFragmentSize,
+ kNumChannels,
+ kFragmentSize,
+ kNumChannels,
+ kFragmentSize),
+ stream_config_(kSampleRate, kNumChannels),
+ clear_data_(kSamples),
+ noise_data_(kNumNoiseBins),
+ orig_data_(kSamples) {
std::srand(1);
- enh_.reset(
- new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
+ enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
+ kNumNoiseBins));
}
bool CheckUpdate() {
- enh_.reset(
- new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
+ enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
+ kNumNoiseBins));
float* clear_cursor = clear_data_.data();
- float* noise_cursor = noise_data_.data();
for (int i = 0; i < kSamples; i += kFragmentSize) {
- enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);
+ enh_->SetCaptureNoiseEstimate(noise_data_, 1);
+ clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
+ enh_->ProcessRenderAudio(&clear_buffer_);
+ clear_buffer_.CopyTo(stream_config_, &clear_cursor);
clear_cursor += kFragmentSize;
- noise_cursor += kFragmentSize;
}
- for (int i = 0; i < kSamples; i++) {
- if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) {
+ for (int i = initial_delay_; i < kSamples; i++) {
+ if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
+ kMaxTestError) {
return true;
}
}
@@ -345,22 +354,30 @@ class IntelligibilityEnhancerTest : public ::testing::Test {
}
std::unique_ptr<IntelligibilityEnhancer> enh_;
+ // Render clean speech buffer.
+ AudioBuffer clear_buffer_;
+ StreamConfig stream_config_;
std::vector<float> clear_data_;
std::vector<float> noise_data_;
std::vector<float> orig_data_;
+ size_t initial_delay_;
};
// For each class of generated data, tests that render stream is updated when
// it should be.
TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
+ initial_delay_ = enh_->render_mangler_->initial_delay();
std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
EXPECT_FALSE(CheckUpdate());
- std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
+ std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
+ orig_data_ = clear_data_;
EXPECT_FALSE(CheckUpdate());
std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
orig_data_ = clear_data_;
+ std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
+ FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
EXPECT_TRUE(CheckUpdate());
}
@@ -418,7 +435,8 @@ TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
float* clear_cursor = clear_data_.data();
for (size_t i = 0; i < kNumFramesToProcess; ++i) {
enh_->SetCaptureNoiseEstimate(noise, kGain);
- enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);
+ clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
+ enh_->ProcessRenderAudio(&clear_buffer_);
}
const std::vector<float>& estimated_psd =
enh_->noise_power_estimator_.power();
@@ -428,6 +446,41 @@ TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
}
}
+TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
+ const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
+ const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
+ const size_t kTestNumBands =
+ rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
+ const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
+ const size_t kTestSplitFragmentSize =
+ rtc::CheckedDivExact(kTestSplitRate, 100);
+ enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
+ kTestNumBands, kNumNoiseBins));
+ size_t initial_delay = enh_->render_mangler_->initial_delay();
+ std::vector<float> rand_gen_buf(kTestFragmentSize);
+ AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
+ kTestFragmentSize, kNumChannels,
+ kTestFragmentSize);
+ AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
+ kNumChannels, kTestFragmentSize);
+ for (size_t i = 0u; i < kTestNumBands; ++i) {
+ std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
+ original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
+ rand_gen_buf.size());
+ audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
+ rand_gen_buf.size());
+ }
+ enh_->ProcessRenderAudio(&audio_buffer);
+ for (size_t i = 0u; i < kTestNumBands; ++i) {
+ const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
+ const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
+ for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
+ EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
+ kMaxTestError);
+ }
+ }
+}
+
TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};

Powered by Google App Engine
This is Rietveld 408576698