Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 const float kTestNonZeroVarLambdaTop[] = { 195 const float kTestNonZeroVarLambdaTop[] = {
196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; 198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
199 static_assert(arraysize(kTestCenterFreqs) == 199 static_assert(arraysize(kTestCenterFreqs) ==
200 arraysize(kTestNonZeroVarLambdaTop), 200 arraysize(kTestNonZeroVarLambdaTop),
201 "Power test data badly initialized."); 201 "Power test data badly initialized.");
202 const float kMaxTestError = 0.005f; 202 const float kMaxTestError = 0.005f;
203 203
204 // Enhancer initialization parameters. 204 // Enhancer initialization parameters.
205 const int kSamples = 1000; 205 const int kSamples = 10000;
206 const int kSampleRate = 4000; 206 const int kSampleRate = 4000;
207 const int kNumChannels = 1; 207 const int kNumChannels = 1;
208 const int kFragmentSize = kSampleRate / 100; 208 const int kFragmentSize = kSampleRate / 100;
209 const size_t kNumNoiseBins = 129; 209 const size_t kNumNoiseBins = 129;
210 210
211 // Number of frames to process in the bitexactness tests. 211 // Number of frames to process in the bitexactness tests.
212 const size_t kNumFramesToProcess = 1000; 212 const size_t kNumFramesToProcess = 1000;
213 213
214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { 214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz 215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
216 ? AudioProcessing::kSampleRate16kHz 216 ? AudioProcessing::kSampleRate16kHz
217 : sample_rate_hz); 217 : sample_rate_hz);
218 } 218 }
219 219
220 // Process one frame of data and produce the output. 220 // Process one frame of data and produce the output.
221 void ProcessOneFrame(int sample_rate_hz, 221 void ProcessOneFrame(int sample_rate_hz,
222 AudioBuffer* render_audio_buffer, 222 AudioBuffer* render_audio_buffer,
223 AudioBuffer* capture_audio_buffer, 223 AudioBuffer* capture_audio_buffer,
224 NoiseSuppressionImpl* noise_suppressor, 224 NoiseSuppressionImpl* noise_suppressor,
225 IntelligibilityEnhancer* intelligibility_enhancer) { 225 IntelligibilityEnhancer* intelligibility_enhancer) {
226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { 226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
227 render_audio_buffer->SplitIntoFrequencyBands(); 227 render_audio_buffer->SplitIntoFrequencyBands();
228 capture_audio_buffer->SplitIntoFrequencyBands(); 228 capture_audio_buffer->SplitIntoFrequencyBands();
229 } 229 }
230 230
231 intelligibility_enhancer->ProcessRenderAudio( 231 intelligibility_enhancer->ProcessRenderAudio(
232 render_audio_buffer->split_channels_f(kBand0To8kHz), 232 render_audio_buffer, IntelligibilityEnhancerSampleRate(sample_rate_hz));
233 IntelligibilityEnhancerSampleRate(sample_rate_hz),
234 render_audio_buffer->num_channels());
235 233
236 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); 234 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
237 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); 235 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
238 236
239 intelligibility_enhancer->SetCaptureNoiseEstimate( 237 intelligibility_enhancer->SetCaptureNoiseEstimate(
240 noise_suppressor->NoiseEstimate(), 0); 238 noise_suppressor->NoiseEstimate(), 0);
241 239
242 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { 240 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
243 render_audio_buffer->MergeFrequencyBands(); 241 render_audio_buffer->MergeFrequencyBands();
244 } 242 }
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
313 311
314 float float_rand() { 312 float float_rand() {
315 return std::rand() * 2.f / RAND_MAX - 1; 313 return std::rand() * 2.f / RAND_MAX - 1;
316 } 314 }
317 315
318 } // namespace 316 } // namespace
319 317
320 class IntelligibilityEnhancerTest : public ::testing::Test { 318 class IntelligibilityEnhancerTest : public ::testing::Test {
321 protected: 319 protected:
322 IntelligibilityEnhancerTest() 320 IntelligibilityEnhancerTest()
323 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) { 321 : clear_buffer_(kFragmentSize,
322 kNumChannels,
323 kFragmentSize,
324 kNumChannels,
325 kFragmentSize),
326 stream_config_(kSampleRate, kNumChannels),
327 clear_data_(kSamples),
328 noise_data_(kNumNoiseBins),
329 orig_data_(kSamples) {
324 std::srand(1); 330 std::srand(1);
325 enh_.reset( 331 enh_.reset(
326 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); 332 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
327 } 333 }
328 334
329 bool CheckUpdate() { 335 bool CheckUpdate() {
330 enh_.reset( 336 enh_.reset(
331 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); 337 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
332 float* clear_cursor = clear_data_.data(); 338 float* clear_cursor = clear_data_.data();
333 float* noise_cursor = noise_data_.data();
334 for (int i = 0; i < kSamples; i += kFragmentSize) { 339 for (int i = 0; i < kSamples; i += kFragmentSize) {
335 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); 340 enh_->SetCaptureNoiseEstimate(noise_data_, 1);
341 clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
342 enh_->ProcessRenderAudio(&clear_buffer_, kSampleRate);
343 clear_buffer_.CopyTo(stream_config_, &clear_cursor);
336 clear_cursor += kFragmentSize; 344 clear_cursor += kFragmentSize;
337 noise_cursor += kFragmentSize;
338 } 345 }
339 for (int i = 0; i < kSamples; i++) { 346 for (int i = 0; i < kSamples; i++) {
340 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) { 347 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) {
341 return true; 348 return true;
342 } 349 }
343 } 350 }
344 return false; 351 return false;
345 } 352 }
346 353
347 std::unique_ptr<IntelligibilityEnhancer> enh_; 354 std::unique_ptr<IntelligibilityEnhancer> enh_;
355 AudioBuffer clear_buffer_;
356 StreamConfig stream_config_;
348 std::vector<float> clear_data_; 357 std::vector<float> clear_data_;
349 std::vector<float> noise_data_; 358 std::vector<float> noise_data_;
350 std::vector<float> orig_data_; 359 std::vector<float> orig_data_;
351 }; 360 };
352 361
353 // For each class of generated data, tests that render stream is updated when 362 // For each class of generated data, tests that render stream is updated when
354 // it should be. 363 // it should be.
355 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { 364 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
356 std::fill(noise_data_.begin(), noise_data_.end(), 0.f); 365 std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
357 std::fill(orig_data_.begin(), orig_data_.end(), 0.f); 366 std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
358 std::fill(clear_data_.begin(), clear_data_.end(), 0.f); 367 std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
359 EXPECT_FALSE(CheckUpdate()); 368 EXPECT_FALSE(CheckUpdate());
360 std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
361 EXPECT_FALSE(CheckUpdate());
362 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); 369 std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
363 orig_data_ = clear_data_; 370 orig_data_ = clear_data_;
371 EXPECT_FALSE(CheckUpdate());
372 std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
373 FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
364 EXPECT_TRUE(CheckUpdate()); 374 EXPECT_TRUE(CheckUpdate());
365 } 375 }
366 376
367 // Tests ERB bank creation, comparing against matlab output. 377 // Tests ERB bank creation, comparing against matlab output.
368 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { 378 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
369 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); 379 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
370 for (size_t i = 0; i < enh_->bank_size_; ++i) { 380 for (size_t i = 0; i < enh_->bank_size_; ++i) {
371 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); 381 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
372 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); 382 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
373 for (size_t j = 0; j < enh_->freqs_; ++j) { 383 for (size_t j = 0; j < enh_->freqs_; ++j) {
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
411 const float kTolerance = 0.007f; 421 const float kTolerance = 0.007f;
412 std::vector<float> noise(kNumNoiseBins); 422 std::vector<float> noise(kNumNoiseBins);
413 std::vector<float> noise_psd(kNumNoiseBins); 423 std::vector<float> noise_psd(kNumNoiseBins);
414 std::generate(noise.begin(), noise.end(), float_rand); 424 std::generate(noise.begin(), noise.end(), float_rand);
415 for (size_t i = 0; i < kNumNoiseBins; ++i) { 425 for (size_t i = 0; i < kNumNoiseBins; ++i) {
416 noise_psd[i] = kGain * kGain * noise[i] * noise[i]; 426 noise_psd[i] = kGain * kGain * noise[i] * noise[i];
417 } 427 }
418 float* clear_cursor = clear_data_.data(); 428 float* clear_cursor = clear_data_.data();
419 for (size_t i = 0; i < kNumFramesToProcess; ++i) { 429 for (size_t i = 0; i < kNumFramesToProcess; ++i) {
420 enh_->SetCaptureNoiseEstimate(noise, kGain); 430 enh_->SetCaptureNoiseEstimate(noise, kGain);
421 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); 431 clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
432 enh_->ProcessRenderAudio(&clear_buffer_, kSampleRate);
422 } 433 }
423 const std::vector<float>& estimated_psd = 434 const std::vector<float>& estimated_psd =
424 enh_->noise_power_estimator_.power(); 435 enh_->noise_power_estimator_.power();
425 for (size_t i = 0; i < kNumNoiseBins; ++i) { 436 for (size_t i = 0; i < kNumNoiseBins; ++i) {
426 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], 437 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
427 kTolerance); 438 kTolerance);
428 } 439 }
429 } 440 }
430 441
431 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { 442 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 } 485 }
475 486
476 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { 487 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
477 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, 488 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
478 -0.012975f, -0.015940f, -0.017820f}; 489 -0.012975f, -0.015940f, -0.017820f};
479 490
480 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); 491 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
481 } 492 }
482 493
483 } // namespace webrtc 494 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698