| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 195 const float kTestNonZeroVarLambdaTop[] = { | 195 const float kTestNonZeroVarLambdaTop[] = { |
| 196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, | 196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, |
| 197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, | 197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, |
| 198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; | 198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; |
| 199 static_assert(arraysize(kTestCenterFreqs) == | 199 static_assert(arraysize(kTestCenterFreqs) == |
| 200 arraysize(kTestNonZeroVarLambdaTop), | 200 arraysize(kTestNonZeroVarLambdaTop), |
| 201 "Power test data badly initialized."); | 201 "Power test data badly initialized."); |
| 202 const float kMaxTestError = 0.005f; | 202 const float kMaxTestError = 0.005f; |
| 203 | 203 |
| 204 // Enhancer initialization parameters. | 204 // Enhancer initialization parameters. |
| 205 const int kSamples = 1000; | 205 const int kSamples = 10000; |
| 206 const int kSampleRate = 4000; | 206 const int kSampleRate = 4000; |
| 207 const int kNumChannels = 1; | 207 const int kNumChannels = 1; |
| 208 const int kFragmentSize = kSampleRate / 100; | 208 const int kFragmentSize = kSampleRate / 100; |
| 209 const size_t kNumNoiseBins = 129; | 209 const size_t kNumNoiseBins = 129; |
| 210 const size_t kNumBands = 1; |
| 210 | 211 |
| 211 // Number of frames to process in the bitexactness tests. | 212 // Number of frames to process in the bitexactness tests. |
| 212 const size_t kNumFramesToProcess = 1000; | 213 const size_t kNumFramesToProcess = 1000; |
| 213 | 214 |
| 214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { | 215 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { |
| 215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz | 216 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz |
| 216 ? AudioProcessing::kSampleRate16kHz | 217 ? AudioProcessing::kSampleRate16kHz |
| 217 : sample_rate_hz); | 218 : sample_rate_hz); |
| 218 } | 219 } |
| 219 | 220 |
| 220 // Process one frame of data and produce the output. | 221 // Process one frame of data and produce the output. |
| 221 void ProcessOneFrame(int sample_rate_hz, | 222 void ProcessOneFrame(int sample_rate_hz, |
| 222 AudioBuffer* render_audio_buffer, | 223 AudioBuffer* render_audio_buffer, |
| 223 AudioBuffer* capture_audio_buffer, | 224 AudioBuffer* capture_audio_buffer, |
| 224 NoiseSuppressionImpl* noise_suppressor, | 225 NoiseSuppressionImpl* noise_suppressor, |
| 225 IntelligibilityEnhancer* intelligibility_enhancer) { | 226 IntelligibilityEnhancer* intelligibility_enhancer) { |
| 226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | 227 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 227 render_audio_buffer->SplitIntoFrequencyBands(); | 228 render_audio_buffer->SplitIntoFrequencyBands(); |
| 228 capture_audio_buffer->SplitIntoFrequencyBands(); | 229 capture_audio_buffer->SplitIntoFrequencyBands(); |
| 229 } | 230 } |
| 230 | 231 |
| 231 intelligibility_enhancer->ProcessRenderAudio( | 232 intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer); |
| 232 render_audio_buffer->split_channels_f(kBand0To8kHz), | |
| 233 IntelligibilityEnhancerSampleRate(sample_rate_hz), | |
| 234 render_audio_buffer->num_channels()); | |
| 235 | 233 |
| 236 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); | 234 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); |
| 237 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); | 235 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); |
| 238 | 236 |
| 239 intelligibility_enhancer->SetCaptureNoiseEstimate( | 237 intelligibility_enhancer->SetCaptureNoiseEstimate( |
| 240 noise_suppressor->NoiseEstimate(), 0); | 238 noise_suppressor->NoiseEstimate(), 0); |
| 241 | 239 |
| 242 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | 240 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 243 render_audio_buffer->MergeFrequencyBands(); | 241 render_audio_buffer->MergeFrequencyBands(); |
| 244 } | 242 } |
| (...skipping 24 matching lines...) Expand all Loading... |
| 269 std::vector<float> capture_input(render_buffer.num_frames() * | 267 std::vector<float> capture_input(render_buffer.num_frames() * |
| 270 capture_buffer.num_channels()); | 268 capture_buffer.num_channels()); |
| 271 | 269 |
| 272 rtc::CriticalSection crit_capture; | 270 rtc::CriticalSection crit_capture; |
| 273 NoiseSuppressionImpl noise_suppressor(&crit_capture); | 271 NoiseSuppressionImpl noise_suppressor(&crit_capture); |
| 274 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); | 272 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); |
| 275 noise_suppressor.Enable(true); | 273 noise_suppressor.Enable(true); |
| 276 | 274 |
| 277 IntelligibilityEnhancer intelligibility_enhancer( | 275 IntelligibilityEnhancer intelligibility_enhancer( |
| 278 IntelligibilityEnhancerSampleRate(sample_rate_hz), | 276 IntelligibilityEnhancerSampleRate(sample_rate_hz), |
| 279 render_config.num_channels(), NoiseSuppressionImpl::num_noise_bins()); | 277 render_config.num_channels(), kNumBands, |
| 278 NoiseSuppressionImpl::num_noise_bins()); |
| 280 | 279 |
| 281 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { | 280 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { |
| 282 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), | 281 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), |
| 283 render_buffer.num_channels(), &render_file, | 282 render_buffer.num_channels(), &render_file, |
| 284 render_input); | 283 render_input); |
| 285 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), | 284 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), |
| 286 capture_buffer.num_channels(), &capture_file, | 285 capture_buffer.num_channels(), &capture_file, |
| 287 capture_input); | 286 capture_input); |
| 288 | 287 |
| 289 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); | 288 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 313 | 312 |
| 314 float float_rand() { | 313 float float_rand() { |
| 315 return std::rand() * 2.f / RAND_MAX - 1; | 314 return std::rand() * 2.f / RAND_MAX - 1; |
| 316 } | 315 } |
| 317 | 316 |
| 318 } // namespace | 317 } // namespace |
| 319 | 318 |
| 320 class IntelligibilityEnhancerTest : public ::testing::Test { | 319 class IntelligibilityEnhancerTest : public ::testing::Test { |
| 321 protected: | 320 protected: |
| 322 IntelligibilityEnhancerTest() | 321 IntelligibilityEnhancerTest() |
| 323 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) { | 322 : clear_buffer_(kFragmentSize, |
| 323 kNumChannels, |
| 324 kFragmentSize, |
| 325 kNumChannels, |
| 326 kFragmentSize), |
| 327 stream_config_(kSampleRate, kNumChannels), |
| 328 clear_data_(kSamples), |
| 329 noise_data_(kNumNoiseBins), |
| 330 orig_data_(kSamples) { |
| 324 std::srand(1); | 331 std::srand(1); |
| 325 enh_.reset( | 332 enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, |
| 326 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); | 333 kNumNoiseBins)); |
| 327 } | 334 } |
| 328 | 335 |
| 329 bool CheckUpdate() { | 336 bool CheckUpdate() { |
| 330 enh_.reset( | 337 enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, |
| 331 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); | 338 kNumNoiseBins)); |
| 332 float* clear_cursor = clear_data_.data(); | 339 float* clear_cursor = clear_data_.data(); |
| 333 float* noise_cursor = noise_data_.data(); | |
| 334 for (int i = 0; i < kSamples; i += kFragmentSize) { | 340 for (int i = 0; i < kSamples; i += kFragmentSize) { |
| 335 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); | 341 enh_->SetCaptureNoiseEstimate(noise_data_, 1); |
| 342 clear_buffer_.CopyFrom(&clear_cursor, stream_config_); |
| 343 enh_->ProcessRenderAudio(&clear_buffer_); |
| 344 clear_buffer_.CopyTo(stream_config_, &clear_cursor); |
| 336 clear_cursor += kFragmentSize; | 345 clear_cursor += kFragmentSize; |
| 337 noise_cursor += kFragmentSize; | |
| 338 } | 346 } |
| 339 for (int i = 0; i < kSamples; i++) { | 347 for (int i = initial_delay_; i < kSamples; i++) { |
| 340 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) { | 348 if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) > |
| 349 kMaxTestError) { |
| 341 return true; | 350 return true; |
| 342 } | 351 } |
| 343 } | 352 } |
| 344 return false; | 353 return false; |
| 345 } | 354 } |
| 346 | 355 |
| 347 std::unique_ptr<IntelligibilityEnhancer> enh_; | 356 std::unique_ptr<IntelligibilityEnhancer> enh_; |
| 357 // Render clean speech buffer. |
| 358 AudioBuffer clear_buffer_; |
| 359 StreamConfig stream_config_; |
| 348 std::vector<float> clear_data_; | 360 std::vector<float> clear_data_; |
| 349 std::vector<float> noise_data_; | 361 std::vector<float> noise_data_; |
| 350 std::vector<float> orig_data_; | 362 std::vector<float> orig_data_; |
| 363 size_t initial_delay_; |
| 351 }; | 364 }; |
| 352 | 365 |
| 353 // For each class of generated data, tests that render stream is updated when | 366 // For each class of generated data, tests that render stream is updated when |
| 354 // it should be. | 367 // it should be. |
| 355 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { | 368 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { |
| 369 initial_delay_ = enh_->render_mangler_->initial_delay(); |
| 356 std::fill(noise_data_.begin(), noise_data_.end(), 0.f); | 370 std::fill(noise_data_.begin(), noise_data_.end(), 0.f); |
| 357 std::fill(orig_data_.begin(), orig_data_.end(), 0.f); | 371 std::fill(orig_data_.begin(), orig_data_.end(), 0.f); |
| 358 std::fill(clear_data_.begin(), clear_data_.end(), 0.f); | 372 std::fill(clear_data_.begin(), clear_data_.end(), 0.f); |
| 359 EXPECT_FALSE(CheckUpdate()); | 373 EXPECT_FALSE(CheckUpdate()); |
| 360 std::generate(noise_data_.begin(), noise_data_.end(), float_rand); | 374 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); |
| 375 orig_data_ = clear_data_; |
| 361 EXPECT_FALSE(CheckUpdate()); | 376 EXPECT_FALSE(CheckUpdate()); |
| 362 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); | 377 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); |
| 363 orig_data_ = clear_data_; | 378 orig_data_ = clear_data_; |
| 379 std::generate(noise_data_.begin(), noise_data_.end(), float_rand); |
| 380 FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data()); |
| 364 EXPECT_TRUE(CheckUpdate()); | 381 EXPECT_TRUE(CheckUpdate()); |
| 365 } | 382 } |
| 366 | 383 |
| 367 // Tests ERB bank creation, comparing against matlab output. | 384 // Tests ERB bank creation, comparing against matlab output. |
| 368 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { | 385 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { |
| 369 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); | 386 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); |
| 370 for (size_t i = 0; i < enh_->bank_size_; ++i) { | 387 for (size_t i = 0; i < enh_->bank_size_; ++i) { |
| 371 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); | 388 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); |
| 372 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); | 389 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); |
| 373 for (size_t j = 0; j < enh_->freqs_; ++j) { | 390 for (size_t j = 0; j < enh_->freqs_; ++j) { |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 411 const float kTolerance = 0.007f; | 428 const float kTolerance = 0.007f; |
| 412 std::vector<float> noise(kNumNoiseBins); | 429 std::vector<float> noise(kNumNoiseBins); |
| 413 std::vector<float> noise_psd(kNumNoiseBins); | 430 std::vector<float> noise_psd(kNumNoiseBins); |
| 414 std::generate(noise.begin(), noise.end(), float_rand); | 431 std::generate(noise.begin(), noise.end(), float_rand); |
| 415 for (size_t i = 0; i < kNumNoiseBins; ++i) { | 432 for (size_t i = 0; i < kNumNoiseBins; ++i) { |
| 416 noise_psd[i] = kGain * kGain * noise[i] * noise[i]; | 433 noise_psd[i] = kGain * kGain * noise[i] * noise[i]; |
| 417 } | 434 } |
| 418 float* clear_cursor = clear_data_.data(); | 435 float* clear_cursor = clear_data_.data(); |
| 419 for (size_t i = 0; i < kNumFramesToProcess; ++i) { | 436 for (size_t i = 0; i < kNumFramesToProcess; ++i) { |
| 420 enh_->SetCaptureNoiseEstimate(noise, kGain); | 437 enh_->SetCaptureNoiseEstimate(noise, kGain); |
| 421 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); | 438 clear_buffer_.CopyFrom(&clear_cursor, stream_config_); |
| 439 enh_->ProcessRenderAudio(&clear_buffer_); |
| 422 } | 440 } |
| 423 const std::vector<float>& estimated_psd = | 441 const std::vector<float>& estimated_psd = |
| 424 enh_->noise_power_estimator_.power(); | 442 enh_->noise_power_estimator_.power(); |
| 425 for (size_t i = 0; i < kNumNoiseBins; ++i) { | 443 for (size_t i = 0; i < kNumNoiseBins; ++i) { |
| 426 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], | 444 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], |
| 427 kTolerance); | 445 kTolerance); |
| 428 } | 446 } |
| 429 } | 447 } |
| 430 | 448 |
| 449 TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) { |
| 450 const int kTestSampleRate = AudioProcessing::kSampleRate32kHz; |
| 451 const int kTestSplitRate = AudioProcessing::kSampleRate16kHz; |
| 452 const size_t kTestNumBands = |
| 453 rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate); |
| 454 const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100); |
| 455 const size_t kTestSplitFragmentSize = |
| 456 rtc::CheckedDivExact(kTestSplitRate, 100); |
| 457 enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels, |
| 458 kTestNumBands, kNumNoiseBins)); |
| 459 size_t initial_delay = enh_->render_mangler_->initial_delay(); |
| 460 std::vector<float> rand_gen_buf(kTestFragmentSize); |
| 461 AudioBuffer original_buffer(kTestFragmentSize, kNumChannels, |
| 462 kTestFragmentSize, kNumChannels, |
| 463 kTestFragmentSize); |
| 464 AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize, |
| 465 kNumChannels, kTestFragmentSize); |
| 466 for (size_t i = 0u; i < kTestNumBands; ++i) { |
| 467 std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand); |
| 468 original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(), |
| 469 rand_gen_buf.size()); |
| 470 audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(), |
| 471 rand_gen_buf.size()); |
| 472 } |
| 473 enh_->ProcessRenderAudio(&audio_buffer); |
| 474 for (size_t i = 0u; i < kTestNumBands; ++i) { |
| 475 const float* original_ptr = original_buffer.split_bands_const_f(0)[i]; |
| 476 const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i]; |
| 477 for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) { |
| 478 EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]), |
| 479 kMaxTestError); |
| 480 } |
| 481 } |
| 482 } |
| 483 |
| 431 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { | 484 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { |
| 432 const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f}; | 485 const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f}; |
| 433 | 486 |
| 434 RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference); | 487 RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference); |
| 435 } | 488 } |
| 436 | 489 |
| 437 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) { | 490 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) { |
| 438 const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f}; | 491 const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f}; |
| 439 | 492 |
| 440 RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference); | 493 RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 474 } | 527 } |
| 475 | 528 |
| 476 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { | 529 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { |
| 477 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, | 530 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, |
| 478 -0.012975f, -0.015940f, -0.017820f}; | 531 -0.012975f, -0.015940f, -0.017820f}; |
| 479 | 532 |
| 480 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); | 533 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); |
| 481 } | 534 } |
| 482 | 535 |
| 483 } // namespace webrtc | 536 } // namespace webrtc |
| OLD | NEW |