OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
195 const float kTestNonZeroVarLambdaTop[] = { | 195 const float kTestNonZeroVarLambdaTop[] = { |
196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, | 196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, |
197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, | 197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, |
198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; | 198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; |
199 static_assert(arraysize(kTestCenterFreqs) == | 199 static_assert(arraysize(kTestCenterFreqs) == |
200 arraysize(kTestNonZeroVarLambdaTop), | 200 arraysize(kTestNonZeroVarLambdaTop), |
201 "Power test data badly initialized."); | 201 "Power test data badly initialized."); |
202 const float kMaxTestError = 0.005f; | 202 const float kMaxTestError = 0.005f; |
203 | 203 |
204 // Enhancer initialization parameters. | 204 // Enhancer initialization parameters. |
205 const int kSamples = 1000; | 205 const int kSamples = 10000; |
peah-webrtc
2016/09/13 13:30:00
What is the motivation behind the changed number o
aluebs-webrtc
2016/09/14 00:35:55
The number samples before where not enough for the
| |
206 const int kSampleRate = 4000; | 206 const int kSampleRate = 4000; |
peah-webrtc
2016/09/13 13:30:00
What is the purpose of the samplerate constant of
aluebs-webrtc
2016/09/14 00:35:54
To set a sample rate for the tests.
peah-webrtc
2016/09/15 15:06:20
You mean that the test is running at a sample rate
aluebs-webrtc
2016/09/15 23:45:25
Other test (like the bitexactness and the one I ju
peah-webrtc
2016/09/16 13:35:56
It makes sense to test for other sample rates. But
aluebs-webrtc
2016/09/17 00:48:48
Acknowledged.
| |
207 const int kNumChannels = 1; | 207 const int kNumChannels = 1; |
208 const int kFragmentSize = kSampleRate / 100; | 208 const int kFragmentSize = kSampleRate / 100; |
209 const size_t kNumNoiseBins = 129; | 209 const size_t kNumNoiseBins = 129; |
210 const size_t kNumBands = 1; | |
210 | 211 |
211 // Number of frames to process in the bitexactness tests. | 212 // Number of frames to process in the bitexactness tests. |
212 const size_t kNumFramesToProcess = 1000; | 213 const size_t kNumFramesToProcess = 1000; |
213 | 214 |
214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { | 215 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { |
215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz | 216 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz |
216 ? AudioProcessing::kSampleRate16kHz | 217 ? AudioProcessing::kSampleRate16kHz |
217 : sample_rate_hz); | 218 : sample_rate_hz); |
218 } | 219 } |
219 | 220 |
220 // Process one frame of data and produce the output. | 221 // Process one frame of data and produce the output. |
221 void ProcessOneFrame(int sample_rate_hz, | 222 void ProcessOneFrame(int sample_rate_hz, |
222 AudioBuffer* render_audio_buffer, | 223 AudioBuffer* render_audio_buffer, |
223 AudioBuffer* capture_audio_buffer, | 224 AudioBuffer* capture_audio_buffer, |
224 NoiseSuppressionImpl* noise_suppressor, | 225 NoiseSuppressionImpl* noise_suppressor, |
225 IntelligibilityEnhancer* intelligibility_enhancer) { | 226 IntelligibilityEnhancer* intelligibility_enhancer) { |
226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | 227 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
227 render_audio_buffer->SplitIntoFrequencyBands(); | 228 render_audio_buffer->SplitIntoFrequencyBands(); |
228 capture_audio_buffer->SplitIntoFrequencyBands(); | 229 capture_audio_buffer->SplitIntoFrequencyBands(); |
229 } | 230 } |
230 | 231 |
231 intelligibility_enhancer->ProcessRenderAudio( | 232 intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer); |
232 render_audio_buffer->split_channels_f(kBand0To8kHz), | |
233 IntelligibilityEnhancerSampleRate(sample_rate_hz), | |
234 render_audio_buffer->num_channels()); | |
235 | 233 |
236 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); | 234 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); |
237 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); | 235 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); |
238 | 236 |
239 intelligibility_enhancer->SetCaptureNoiseEstimate( | 237 intelligibility_enhancer->SetCaptureNoiseEstimate( |
240 noise_suppressor->NoiseEstimate(), 0); | 238 noise_suppressor->NoiseEstimate(), 0); |
241 | 239 |
242 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | 240 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
243 render_audio_buffer->MergeFrequencyBands(); | 241 render_audio_buffer->MergeFrequencyBands(); |
244 } | 242 } |
(...skipping 24 matching lines...) Expand all Loading... | |
269 std::vector<float> capture_input(render_buffer.num_frames() * | 267 std::vector<float> capture_input(render_buffer.num_frames() * |
270 capture_buffer.num_channels()); | 268 capture_buffer.num_channels()); |
271 | 269 |
272 rtc::CriticalSection crit_capture; | 270 rtc::CriticalSection crit_capture; |
273 NoiseSuppressionImpl noise_suppressor(&crit_capture); | 271 NoiseSuppressionImpl noise_suppressor(&crit_capture); |
274 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); | 272 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); |
275 noise_suppressor.Enable(true); | 273 noise_suppressor.Enable(true); |
276 | 274 |
277 IntelligibilityEnhancer intelligibility_enhancer( | 275 IntelligibilityEnhancer intelligibility_enhancer( |
278 IntelligibilityEnhancerSampleRate(sample_rate_hz), | 276 IntelligibilityEnhancerSampleRate(sample_rate_hz), |
279 render_config.num_channels(), NoiseSuppressionImpl::num_noise_bins()); | 277 render_config.num_channels(), kNumBands, |
278 NoiseSuppressionImpl::num_noise_bins()); | |
280 | 279 |
281 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { | 280 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { |
282 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), | 281 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), |
283 render_buffer.num_channels(), &render_file, | 282 render_buffer.num_channels(), &render_file, |
284 render_input); | 283 render_input); |
285 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), | 284 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), |
286 capture_buffer.num_channels(), &capture_file, | 285 capture_buffer.num_channels(), &capture_file, |
287 capture_input); | 286 capture_input); |
288 | 287 |
289 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); | 288 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); |
(...skipping 23 matching lines...) Expand all Loading... | |
313 | 312 |
314 float float_rand() { | 313 float float_rand() { |
315 return std::rand() * 2.f / RAND_MAX - 1; | 314 return std::rand() * 2.f / RAND_MAX - 1; |
316 } | 315 } |
317 | 316 |
318 } // namespace | 317 } // namespace |
319 | 318 |
320 class IntelligibilityEnhancerTest : public ::testing::Test { | 319 class IntelligibilityEnhancerTest : public ::testing::Test { |
321 protected: | 320 protected: |
322 IntelligibilityEnhancerTest() | 321 IntelligibilityEnhancerTest() |
323 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) { | 322 : clear_buffer_(kFragmentSize, |
323 kNumChannels, | |
324 kFragmentSize, | |
325 kNumChannels, | |
326 kFragmentSize), | |
327 stream_config_(kSampleRate, kNumChannels), | |
328 clear_data_(kSamples), | |
329 noise_data_(kNumNoiseBins), | |
330 orig_data_(kSamples) { | |
324 std::srand(1); | 331 std::srand(1); |
325 enh_.reset( | 332 enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, |
326 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); | 333 kNumNoiseBins)); |
327 } | 334 } |
328 | 335 |
329 bool CheckUpdate() { | 336 bool CheckUpdate() { |
peah-webrtc
2016/09/13 13:30:00
I'd like a more descriptive name here. This method
aluebs-webrtc
2016/09/14 00:35:54
I think that it is a great idea, but I don't think
peah-webrtc
2016/09/15 15:06:20
Acknowledged.
| |
330 enh_.reset( | 337 enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, |
331 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); | 338 kNumNoiseBins)); |
332 float* clear_cursor = clear_data_.data(); | 339 float* clear_cursor = clear_data_.data(); |
peah-webrtc
2016/09/13 13:30:00
The name cursor I think is quite misleading as I c
aluebs-webrtc
2016/09/14 00:35:54
See above about naming.
| |
333 float* noise_cursor = noise_data_.data(); | |
334 for (int i = 0; i < kSamples; i += kFragmentSize) { | 340 for (int i = 0; i < kSamples; i += kFragmentSize) { |
335 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); | 341 enh_->SetCaptureNoiseEstimate(noise_data_, 1); |
342 clear_buffer_.CopyFrom(&clear_cursor, stream_config_); | |
343 enh_->ProcessRenderAudio(&clear_buffer_); | |
344 clear_buffer_.CopyTo(stream_config_, &clear_cursor); | |
336 clear_cursor += kFragmentSize; | 345 clear_cursor += kFragmentSize; |
337 noise_cursor += kFragmentSize; | |
338 } | 346 } |
339 for (int i = 0; i < kSamples; i++) { | 347 for (int i = initial_delay_; i < kSamples; i++) { |
340 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) { | 348 if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) > |
peah-webrtc
2016/09/13 13:30:00
As far as I can see, this does not verify that the
aluebs-webrtc
2016/09/14 00:35:54
Added test.
| |
349 kMaxTestError) { | |
341 return true; | 350 return true; |
342 } | 351 } |
343 } | 352 } |
344 return false; | 353 return false; |
345 } | 354 } |
346 | 355 |
347 std::unique_ptr<IntelligibilityEnhancer> enh_; | 356 std::unique_ptr<IntelligibilityEnhancer> enh_; |
357 AudioBuffer clear_buffer_; | |
peah-webrtc
2016/09/13 13:30:00
What is the reason for this name? Please explain,
aluebs-webrtc
2016/09/14 00:35:54
To be consistent with the naming in the test I am
| |
358 StreamConfig stream_config_; | |
348 std::vector<float> clear_data_; | 359 std::vector<float> clear_data_; |
349 std::vector<float> noise_data_; | 360 std::vector<float> noise_data_; |
350 std::vector<float> orig_data_; | 361 std::vector<float> orig_data_; |
362 size_t initial_delay_; | |
351 }; | 363 }; |
352 | 364 |
353 // For each class of generated data, tests that render stream is updated when | 365 // For each class of generated data, tests that render stream is updated when |
354 // it should be. | 366 // it should be. |
355 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { | 367 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { |
368 initial_delay_ = enh_->render_mangler_->initial_delay(); | |
356 std::fill(noise_data_.begin(), noise_data_.end(), 0.f); | 369 std::fill(noise_data_.begin(), noise_data_.end(), 0.f); |
357 std::fill(orig_data_.begin(), orig_data_.end(), 0.f); | 370 std::fill(orig_data_.begin(), orig_data_.end(), 0.f); |
358 std::fill(clear_data_.begin(), clear_data_.end(), 0.f); | 371 std::fill(clear_data_.begin(), clear_data_.end(), 0.f); |
359 EXPECT_FALSE(CheckUpdate()); | 372 EXPECT_FALSE(CheckUpdate()); |
360 std::generate(noise_data_.begin(), noise_data_.end(), float_rand); | 373 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); |
374 orig_data_ = clear_data_; | |
361 EXPECT_FALSE(CheckUpdate()); | 375 EXPECT_FALSE(CheckUpdate()); |
362 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); | 376 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); |
363 orig_data_ = clear_data_; | 377 orig_data_ = clear_data_; |
378 std::generate(noise_data_.begin(), noise_data_.end(), float_rand); | |
379 FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data()); | |
364 EXPECT_TRUE(CheckUpdate()); | 380 EXPECT_TRUE(CheckUpdate()); |
365 } | 381 } |
366 | 382 |
367 // Tests ERB bank creation, comparing against matlab output. | 383 // Tests ERB bank creation, comparing against matlab output. |
368 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { | 384 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { |
369 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); | 385 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); |
370 for (size_t i = 0; i < enh_->bank_size_; ++i) { | 386 for (size_t i = 0; i < enh_->bank_size_; ++i) { |
371 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); | 387 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); |
372 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); | 388 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); |
373 for (size_t j = 0; j < enh_->freqs_; ++j) { | 389 for (size_t j = 0; j < enh_->freqs_; ++j) { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
411 const float kTolerance = 0.007f; | 427 const float kTolerance = 0.007f; |
412 std::vector<float> noise(kNumNoiseBins); | 428 std::vector<float> noise(kNumNoiseBins); |
413 std::vector<float> noise_psd(kNumNoiseBins); | 429 std::vector<float> noise_psd(kNumNoiseBins); |
414 std::generate(noise.begin(), noise.end(), float_rand); | 430 std::generate(noise.begin(), noise.end(), float_rand); |
415 for (size_t i = 0; i < kNumNoiseBins; ++i) { | 431 for (size_t i = 0; i < kNumNoiseBins; ++i) { |
416 noise_psd[i] = kGain * kGain * noise[i] * noise[i]; | 432 noise_psd[i] = kGain * kGain * noise[i] * noise[i]; |
417 } | 433 } |
418 float* clear_cursor = clear_data_.data(); | 434 float* clear_cursor = clear_data_.data(); |
419 for (size_t i = 0; i < kNumFramesToProcess; ++i) { | 435 for (size_t i = 0; i < kNumFramesToProcess; ++i) { |
420 enh_->SetCaptureNoiseEstimate(noise, kGain); | 436 enh_->SetCaptureNoiseEstimate(noise, kGain); |
421 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); | 437 clear_buffer_.CopyFrom(&clear_cursor, stream_config_); |
peah-webrtc
2016/09/13 13:30:00
Why don't you update the clear cursor counter here
aluebs-webrtc
2016/09/14 00:35:54
Because it is irrelevant to the Noise PSD estimati
peah-webrtc
2016/09/15 15:06:20
I see, so basically you don't really care about wh
aluebs-webrtc
2016/09/15 23:45:25
Agreed. But it is unrelated to this CL, so we shou
| |
438 enh_->ProcessRenderAudio(&clear_buffer_); | |
422 } | 439 } |
423 const std::vector<float>& estimated_psd = | 440 const std::vector<float>& estimated_psd = |
424 enh_->noise_power_estimator_.power(); | 441 enh_->noise_power_estimator_.power(); |
425 for (size_t i = 0; i < kNumNoiseBins; ++i) { | 442 for (size_t i = 0; i < kNumNoiseBins; ++i) { |
426 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], | 443 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], |
427 kTolerance); | 444 kTolerance); |
428 } | 445 } |
429 } | 446 } |
430 | 447 |
431 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { | 448 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
474 } | 491 } |
475 | 492 |
476 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { | 493 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { |
477 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, | 494 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, |
478 -0.012975f, -0.015940f, -0.017820f}; | 495 -0.012975f, -0.015940f, -0.017820f}; |
479 | 496 |
480 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); | 497 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); |
481 } | 498 } |
482 | 499 |
483 } // namespace webrtc | 500 } // namespace webrtc |
OLD | NEW |