OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <math.h> | 11 #include <math.h> |
12 #include <stdlib.h> | 12 #include <stdlib.h> |
13 | 13 |
14 #include <algorithm> | 14 #include <algorithm> |
15 #include <memory> | 15 #include <memory> |
16 #include <vector> | 16 #include <vector> |
17 | 17 |
18 #include "testing/gtest/include/gtest/gtest.h" | 18 #include "testing/gtest/include/gtest/gtest.h" |
| 19 #include "webrtc/base/array_view.h" |
19 #include "webrtc/base/arraysize.h" | 20 #include "webrtc/base/arraysize.h" |
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" | 21 #include "webrtc/common_audio/signal_processing/include/signal_processing_librar
y.h" |
| 22 #include "webrtc/modules/audio_processing/audio_buffer.h" |
21 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc
er.h" | 23 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc
er.h" |
| 24 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" |
| 25 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" |
| 26 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" |
22 | 27 |
23 namespace webrtc { | 28 namespace webrtc { |
24 | 29 |
25 namespace { | 30 namespace { |
26 | 31 |
27 // Target output for ERB create test. Generated with matlab. | 32 // Target output for ERB create test. Generated with matlab. |
28 const float kTestCenterFreqs[] = { | 33 const float kTestCenterFreqs[] = { |
29 14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f, | 34 14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f, |
30 137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f, | 35 137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f, |
31 309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f, | 36 309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f, |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
196 "Power test data badly initialized."); | 201 "Power test data badly initialized."); |
197 const float kMaxTestError = 0.005f; | 202 const float kMaxTestError = 0.005f; |
198 | 203 |
199 // Enhancer initialization parameters. | 204 // Enhancer initialization parameters. |
200 const int kSamples = 1000; | 205 const int kSamples = 1000; |
201 const int kSampleRate = 4000; | 206 const int kSampleRate = 4000; |
202 const int kNumChannels = 1; | 207 const int kNumChannels = 1; |
203 const int kFragmentSize = kSampleRate / 100; | 208 const int kFragmentSize = kSampleRate / 100; |
204 const size_t kNumNoiseBins = 129; | 209 const size_t kNumNoiseBins = 129; |
205 | 210 |
| 211 // Number of frames to process in the bitexactness tests. |
| 212 const size_t kNumFramesToProcess = 1000; |
| 213 |
| 214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { |
| 215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz |
| 216 ? AudioProcessing::kSampleRate16kHz |
| 217 : sample_rate_hz); |
| 218 } |
| 219 |
| 220 // Process one frame of data and produce the output. |
| 221 void ProcessOneFrame(int sample_rate_hz, |
| 222 AudioBuffer* render_audio_buffer, |
| 223 AudioBuffer* capture_audio_buffer, |
| 224 NoiseSuppressionImpl* noise_suppressor, |
| 225 IntelligibilityEnhancer* intelligibility_enhancer) { |
| 226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 227 render_audio_buffer->SplitIntoFrequencyBands(); |
| 228 capture_audio_buffer->SplitIntoFrequencyBands(); |
| 229 } |
| 230 |
| 231 intelligibility_enhancer->ProcessRenderAudio( |
| 232 render_audio_buffer->split_channels_f(kBand0To8kHz), |
| 233 IntelligibilityEnhancerSampleRate(sample_rate_hz), |
| 234 render_audio_buffer->num_channels()); |
| 235 |
| 236 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); |
| 237 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); |
| 238 |
| 239 intelligibility_enhancer->SetCaptureNoiseEstimate( |
| 240 noise_suppressor->NoiseEstimate()); |
| 241 |
| 242 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 243 render_audio_buffer->MergeFrequencyBands(); |
| 244 } |
| 245 } |
| 246 |
| 247 // Processes a specified amount of frames, verifies the results and reports |
| 248 // any errors. |
| 249 void RunBitexactnessTest(int sample_rate_hz, |
| 250 size_t num_channels, |
| 251 rtc::ArrayView<const float> output_reference) { |
| 252 const StreamConfig render_config(sample_rate_hz, num_channels, false); |
| 253 AudioBuffer render_buffer( |
| 254 render_config.num_frames(), render_config.num_channels(), |
| 255 render_config.num_frames(), render_config.num_channels(), |
| 256 render_config.num_frames()); |
| 257 test::InputAudioFile render_file( |
| 258 test::GetApmRenderTestVectorFileName(sample_rate_hz)); |
| 259 std::vector<float> render_input(render_buffer.num_frames() * |
| 260 render_buffer.num_channels()); |
| 261 |
| 262 const StreamConfig capture_config(sample_rate_hz, num_channels, false); |
| 263 AudioBuffer capture_buffer( |
| 264 capture_config.num_frames(), capture_config.num_channels(), |
| 265 capture_config.num_frames(), capture_config.num_channels(), |
| 266 capture_config.num_frames()); |
| 267 test::InputAudioFile capture_file( |
| 268 test::GetApmCaptureTestVectorFileName(sample_rate_hz)); |
| 269 std::vector<float> capture_input(render_buffer.num_frames() * |
| 270 capture_buffer.num_channels()); |
| 271 |
| 272 rtc::CriticalSection crit_capture; |
| 273 NoiseSuppressionImpl noise_suppressor(&crit_capture); |
| 274 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); |
| 275 noise_suppressor.Enable(true); |
| 276 |
| 277 IntelligibilityEnhancer intelligibility_enhancer( |
| 278 IntelligibilityEnhancerSampleRate(sample_rate_hz), |
| 279 render_config.num_channels(), NoiseSuppressionImpl::num_noise_bins()); |
| 280 |
| 281 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { |
| 282 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), |
| 283 render_buffer.num_channels(), &render_file, |
| 284 render_input); |
| 285 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), |
| 286 capture_buffer.num_channels(), &capture_file, |
| 287 capture_input); |
| 288 |
| 289 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); |
| 290 test::CopyVectorToAudioBuffer(capture_config, capture_input, |
| 291 &capture_buffer); |
| 292 |
| 293 ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer, |
| 294 &noise_suppressor, &intelligibility_enhancer); |
| 295 } |
| 296 |
| 297 // Extract and verify the test results. |
| 298 std::vector<float> render_output; |
| 299 test::ExtractVectorFromAudioBuffer(render_config, &render_buffer, |
| 300 &render_output); |
| 301 |
| 302 const float kTolerance = 1.f / static_cast<float>(1 << 15); |
| 303 |
| 304 // Compare the output with the reference. Only the first values of the output |
| 305 // from last frame processed are compared in order not having to specify all |
| 306 // preceeding frames as testvectors. As the algorithm being tested has a |
| 307 // memory, testing only the last frame implicitly also tests the preceeding |
| 308 // frames. |
| 309 EXPECT_TRUE(test::BitExactFrame(render_buffer.num_frames(), |
| 310 render_config.num_channels(), |
| 311 output_reference, render_output, kTolerance)); |
| 312 } |
| 313 |
206 } // namespace | 314 } // namespace |
207 | 315 |
208 class IntelligibilityEnhancerTest : public ::testing::Test { | 316 class IntelligibilityEnhancerTest : public ::testing::Test { |
209 protected: | 317 protected: |
210 IntelligibilityEnhancerTest() | 318 IntelligibilityEnhancerTest() |
211 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) { | 319 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) { |
212 enh_.reset( | 320 enh_.reset( |
213 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); | 321 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); |
214 } | 322 } |
215 | 323 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
288 for (size_t i = 0; i < enh_->bank_size_; i++) { | 396 for (size_t i = 0; i < enh_->bank_size_; i++) { |
289 EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); | 397 EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); |
290 } | 398 } |
291 lambda = -1.f; | 399 lambda = -1.f; |
292 enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); | 400 enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); |
293 for (size_t i = 0; i < enh_->bank_size_; i++) { | 401 for (size_t i = 0; i < enh_->bank_size_; i++) { |
294 EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); | 402 EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); |
295 } | 403 } |
296 } | 404 } |
297 | 405 |
| 406 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { |
| 407 const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f}; |
| 408 |
| 409 RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference); |
| 410 } |
| 411 |
| 412 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) { |
| 413 const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f}; |
| 414 |
| 415 RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference); |
| 416 } |
| 417 |
| 418 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) { |
| 419 const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f}; |
| 420 |
| 421 RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference); |
| 422 } |
| 423 |
| 424 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) { |
| 425 const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f}; |
| 426 |
| 427 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference); |
| 428 } |
| 429 |
| 430 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) { |
| 431 const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f, |
| 432 -0.000641f, 0.000366f, 0.000641f}; |
| 433 |
| 434 RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference); |
| 435 } |
| 436 |
| 437 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) { |
| 438 const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f, |
| 439 -0.001404f, -0.001465f, 0.000549f}; |
| 440 |
| 441 RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference); |
| 442 } |
| 443 |
| 444 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) { |
| 445 const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f, |
| 446 -0.001343f, -0.004578f, 0.000977f}; |
| 447 |
| 448 RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference); |
| 449 } |
| 450 |
| 451 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { |
| 452 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, |
| 453 -0.012975f, -0.015940f, -0.017820f}; |
| 454 |
| 455 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); |
| 456 } |
| 457 |
298 } // namespace webrtc | 458 } // namespace webrtc |
OLD | NEW |