| Index: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
|
| diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
|
| index 45f338c642540acaabf8fb96652b55c28f51cce0..0c56870976cefff579cc17b4c54edbaea30f9785 100644
|
| --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
|
| +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
|
| @@ -202,11 +202,12 @@ static_assert(arraysize(kTestCenterFreqs) ==
|
| const float kMaxTestError = 0.005f;
|
|
|
| // Enhancer initialization parameters.
|
| -const int kSamples = 1000;
|
| +const int kSamples = 10000;
|
| const int kSampleRate = 4000;
|
| const int kNumChannels = 1;
|
| const int kFragmentSize = kSampleRate / 100;
|
| const size_t kNumNoiseBins = 129;
|
| +const size_t kNumBands = 1;
|
|
|
| // Number of frames to process in the bitexactness tests.
|
| const size_t kNumFramesToProcess = 1000;
|
| @@ -228,10 +229,7 @@ void ProcessOneFrame(int sample_rate_hz,
|
| capture_audio_buffer->SplitIntoFrequencyBands();
|
| }
|
|
|
| - intelligibility_enhancer->ProcessRenderAudio(
|
| - render_audio_buffer->split_channels_f(kBand0To8kHz),
|
| - IntelligibilityEnhancerSampleRate(sample_rate_hz),
|
| - render_audio_buffer->num_channels());
|
| + intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
|
|
|
| noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
|
| noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
|
| @@ -276,7 +274,8 @@ void RunBitexactnessTest(int sample_rate_hz,
|
|
|
| IntelligibilityEnhancer intelligibility_enhancer(
|
| IntelligibilityEnhancerSampleRate(sample_rate_hz),
|
| - render_config.num_channels(), NoiseSuppressionImpl::num_noise_bins());
|
| + render_config.num_channels(), kNumBands,
|
| + NoiseSuppressionImpl::num_noise_bins());
|
|
|
| for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
|
| ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
|
| @@ -320,24 +319,34 @@ float float_rand() {
|
| class IntelligibilityEnhancerTest : public ::testing::Test {
|
| protected:
|
| IntelligibilityEnhancerTest()
|
| - : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {
|
| + : clear_buffer_(kFragmentSize,
|
| + kNumChannels,
|
| + kFragmentSize,
|
| + kNumChannels,
|
| + kFragmentSize),
|
| + stream_config_(kSampleRate, kNumChannels),
|
| + clear_data_(kSamples),
|
| + noise_data_(kNumNoiseBins),
|
| + orig_data_(kSamples) {
|
| std::srand(1);
|
| - enh_.reset(
|
| - new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
|
| + enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
|
| + kNumNoiseBins));
|
| }
|
|
|
| bool CheckUpdate() {
|
| - enh_.reset(
|
| - new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
|
| + enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
|
| + kNumNoiseBins));
|
| float* clear_cursor = clear_data_.data();
|
| - float* noise_cursor = noise_data_.data();
|
| for (int i = 0; i < kSamples; i += kFragmentSize) {
|
| - enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);
|
| + enh_->SetCaptureNoiseEstimate(noise_data_, 1);
|
| + clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
|
| + enh_->ProcessRenderAudio(&clear_buffer_);
|
| + clear_buffer_.CopyTo(stream_config_, &clear_cursor);
|
| clear_cursor += kFragmentSize;
|
| - noise_cursor += kFragmentSize;
|
| }
|
| - for (int i = 0; i < kSamples; i++) {
|
| - if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) {
|
| + for (int i = initial_delay_; i < kSamples; i++) {
|
| + if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
|
| + kMaxTestError) {
|
| return true;
|
| }
|
| }
|
| @@ -345,22 +354,30 @@ class IntelligibilityEnhancerTest : public ::testing::Test {
|
| }
|
|
|
| std::unique_ptr<IntelligibilityEnhancer> enh_;
|
| + // Render clean speech buffer.
|
| + AudioBuffer clear_buffer_;
|
| + StreamConfig stream_config_;
|
| std::vector<float> clear_data_;
|
| std::vector<float> noise_data_;
|
| std::vector<float> orig_data_;
|
| + size_t initial_delay_;
|
| };
|
|
|
| // For each class of generated data, tests that render stream is updated when
|
| // it should be.
|
| TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
|
| + initial_delay_ = enh_->render_mangler_->initial_delay();
|
| std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
|
| std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
|
| std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
|
| EXPECT_FALSE(CheckUpdate());
|
| - std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
|
| + std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
|
| + orig_data_ = clear_data_;
|
| EXPECT_FALSE(CheckUpdate());
|
| std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
|
| orig_data_ = clear_data_;
|
| + std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
|
| + FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
|
| EXPECT_TRUE(CheckUpdate());
|
| }
|
|
|
| @@ -418,7 +435,8 @@ TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
|
| float* clear_cursor = clear_data_.data();
|
| for (size_t i = 0; i < kNumFramesToProcess; ++i) {
|
| enh_->SetCaptureNoiseEstimate(noise, kGain);
|
| - enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);
|
| + clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
|
| + enh_->ProcessRenderAudio(&clear_buffer_);
|
| }
|
| const std::vector<float>& estimated_psd =
|
| enh_->noise_power_estimator_.power();
|
| @@ -428,6 +446,41 @@ TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
|
| }
|
| }
|
|
|
| +TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
|
| + const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
|
| + const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
|
| + const size_t kTestNumBands =
|
| + rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
|
| + const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
|
| + const size_t kTestSplitFragmentSize =
|
| + rtc::CheckedDivExact(kTestSplitRate, 100);
|
| + enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
|
| + kTestNumBands, kNumNoiseBins));
|
| + size_t initial_delay = enh_->render_mangler_->initial_delay();
|
| + std::vector<float> rand_gen_buf(kTestFragmentSize);
|
| + AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
|
| + kTestFragmentSize, kNumChannels,
|
| + kTestFragmentSize);
|
| + AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
|
| + kNumChannels, kTestFragmentSize);
|
| + for (size_t i = 0u; i < kTestNumBands; ++i) {
|
| + std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
|
| + original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
|
| + rand_gen_buf.size());
|
| + audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
|
| + rand_gen_buf.size());
|
| + }
|
| + enh_->ProcessRenderAudio(&audio_buffer);
|
| + for (size_t i = 0u; i < kTestNumBands; ++i) {
|
| + const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
|
| + const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
|
| + for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
|
| + EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
|
| + kMaxTestError);
|
| + }
|
| + }
|
| +}
|
| +
|
| TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
|
| const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};
|
|
|
|
|