OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 #include <vector> | |
11 | |
12 #include "testing/gtest/include/gtest/gtest.h" | |
13 #include "webrtc/base/array_view.h" | |
14 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
15 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" | |
16 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" | |
17 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" | |
18 | |
19 namespace webrtc { | |
20 namespace { | |
21 | |
22 const int kNumFramesToProcess = 1000; | |
23 | |
24 // Process one frame of data and produce the output. | |
25 void ProcessOneFrame(int sample_rate_hz, | |
26 AudioBuffer* capture_buffer, | |
27 NoiseSuppressionImpl* noise_suppressor) { | |
28 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
29 capture_buffer->SplitIntoFrequencyBands(); | |
30 } | |
31 | |
32 noise_suppressor->AnalyzeCaptureAudio(capture_buffer); | |
33 noise_suppressor->ProcessCaptureAudio(capture_buffer); | |
34 | |
35 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
36 capture_buffer->MergeFrequencyBands(); | |
37 } | |
38 } | |
39 | |
40 // Processes a specified amount of frames, verifies the results and reports | |
41 // any errors. | |
42 void RunBitexactnessTest(int sample_rate_hz, | |
43 size_t num_channels, | |
44 NoiseSuppressionImpl::Level level, | |
45 float speech_probability_reference, | |
46 rtc::ArrayView<const float> noise_estimate_reference, | |
47 rtc::ArrayView<const float> output_reference) { | |
48 rtc::CriticalSection crit_capture; | |
49 NoiseSuppressionImpl noise_suppressor(&crit_capture); | |
50 noise_suppressor.Initialize(num_channels, sample_rate_hz); | |
51 noise_suppressor.Enable(true); | |
52 noise_suppressor.set_level(level); | |
53 | |
54 int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); | |
55 const StreamConfig capture_config(sample_rate_hz, num_channels, false); | |
56 AudioBuffer capture_buffer( | |
57 capture_config.num_frames(), capture_config.num_channels(), | |
58 capture_config.num_frames(), capture_config.num_channels(), | |
59 capture_config.num_frames()); | |
60 test::InputAudioFile capture_file( | |
61 test::GetApmCaptureTestVectorFileName(sample_rate_hz)); | |
62 std::vector<float> capture_input(samples_per_channel * num_channels); | |
63 for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { | |
64 ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, | |
65 &capture_file, capture_input); | |
66 | |
67 test::CopyVectorToAudioBuffer(capture_config, capture_input, | |
68 &capture_buffer); | |
69 | |
70 ProcessOneFrame(sample_rate_hz, &capture_buffer, &noise_suppressor); | |
71 } | |
72 | |
73 // Extract test results. | |
74 std::vector<float> capture_output; | |
75 test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, | |
76 &capture_output); | |
77 float speech_probability = noise_suppressor.speech_probability(); | |
78 std::vector<float> noise_estimate = noise_suppressor.NoiseEstimate(); | |
79 | |
80 const float kTolerance = 1.0f / 32768.0f; | |
81 EXPECT_FLOAT_EQ(speech_probability_reference, speech_probability); | |
82 EXPECT_TRUE(test::BitExactVector(noise_estimate_reference, noise_estimate, | |
83 kTolerance)); | |
84 | |
85 // Compare the output with the reference. Only the first values of the output | |
86 // from last frame processed are compared in order not having to specify all | |
87 // preceeding frames as testvectors. As the algorithm being tested has a | |
88 // memory, testing only the last frame implicitly also tests the preceeding | |
89 // frames. | |
90 EXPECT_TRUE(test::BitExactFrame( | |
91 capture_config.num_frames(), capture_config.num_channels(), | |
92 output_reference, capture_output, kTolerance)); | |
93 } | |
94 | |
95 } // namespace | |
96 | |
97 TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLow) { | |
98 #if defined(WEBRTC_ARCH_ARM64) | |
99 const float kSpeechProbabilityReference = -4.0f; | |
100 const float kNoiseEstimateReference[] = {2.797542f, 6.488125f, 14.995160f}; | |
101 const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; | |
102 #elif defined(WEBRTC_ARCH_ARM) | |
103 const float kSpeechProbabilityReference = -4.0f; | |
104 const float kNoiseEstimateReference[] = {2.797542f, 6.488125f, 14.995160f}; | |
105 const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; | |
106 #else | |
107 const float kSpeechProbabilityReference = 0.73421317f; | |
108 const float kNoiseEstimateReference[] = {0.035866f, 0.100382f, 0.229889f}; | |
109 const float kOutputReference[] = {0.003263f, 0.004402f, 0.004537f}; | |
110 #endif | |
111 | |
112 RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow, | |
113 kSpeechProbabilityReference, kNoiseEstimateReference, | |
114 kOutputReference); | |
115 } | |
116 | |
117 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLow) { | |
118 #if defined(WEBRTC_ARCH_ARM64) | |
119 const float kSpeechProbabilityReference = -4.0f; | |
120 const float kNoiseEstimateReference[] = {2.475060f, 6.130507f, 14.030761f}; | |
121 const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; | |
122 #elif defined(WEBRTC_ARCH_ARM) | |
123 const float kSpeechProbabilityReference = -4.0f; | |
124 const float kNoiseEstimateReference[] = {2.475060f, 6.130507f, 14.030761f}; | |
125 const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; | |
126 #else | |
127 const float kSpeechProbabilityReference = 0.71672988f; | |
128 const float kNoiseEstimateReference[] = {0.065653f, 0.198662f, 0.477870f}; | |
129 const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f}; | |
130 #endif | |
131 | |
132 RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kLow, | |
133 kSpeechProbabilityReference, kNoiseEstimateReference, | |
134 kOutputReference); | |
135 } | |
136 | |
137 TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLow) { | |
138 #if defined(WEBRTC_ARCH_ARM64) | |
139 const float kSpeechProbabilityReference = -4.0f; | |
140 const float kNoiseEstimateReference[] = {2.480526f, 6.169749f, 14.102388f}; | |
141 const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f}; | |
142 #elif defined(WEBRTC_ARCH_ARM) | |
143 const float kSpeechProbabilityReference = -4.0f; | |
144 const float kNoiseEstimateReference[] = {2.480526f, 6.169749f, 14.102388f}; | |
145 const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f}; | |
146 #else | |
147 const float kSpeechProbabilityReference = 0.67999554f; | |
148 const float kNoiseEstimateReference[] = {0.065606f, 0.215971f, 0.455931f}; | |
149 const float kOutputReference[] = {0.001221f, 0.001984f, 0.002228f}; | |
150 #endif | |
151 | |
152 RunBitexactnessTest(32000, 1, NoiseSuppression::Level::kLow, | |
153 kSpeechProbabilityReference, kNoiseEstimateReference, | |
154 kOutputReference); | |
155 } | |
156 | |
157 TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLow) { | |
158 #if defined(WEBRTC_ARCH_ARM64) | |
159 const float kSpeechProbabilityReference = -4.0f; | |
160 const float kNoiseEstimateReference[] = {2.504498f, 6.068024f, 13.058871f}; | |
161 const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; | |
162 #elif defined(WEBRTC_ARCH_ARM) | |
163 const float kSpeechProbabilityReference = -4.0f; | |
164 const float kNoiseEstimateReference[] = {2.504498f, 6.068024f, 13.058871f}; | |
165 const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; | |
166 #else | |
167 const float kSpeechProbabilityReference = 0.70645678f; | |
168 const float kNoiseEstimateReference[] = {0.066186f, 0.210660f, 0.402548f}; | |
169 const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f}; | |
170 #endif | |
171 | |
172 RunBitexactnessTest(48000, 1, NoiseSuppression::Level::kLow, | |
173 kSpeechProbabilityReference, kNoiseEstimateReference, | |
174 kOutputReference); | |
175 } | |
176 | |
177 TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLow) { | |
178 #if defined(WEBRTC_ARCH_ARM64) | |
179 const float kSpeechProbabilityReference = -4.0f; | |
180 const float kNoiseEstimateReference[] = {9.757937f, 12.392158f, 11.317673f}; | |
181 const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, | |
182 -0.002441f, 0.000855f, -0.003204f}; | |
183 #elif defined(WEBRTC_ARCH_ARM) | |
184 const float kSpeechProbabilityReference = -4.0f; | |
185 const float kNoiseEstimateReference[] = {10.079447f, 11.849465f, 10.667051f}; | |
186 const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, | |
187 -0.002472f, 0.000916f, -0.003235f}; | |
188 #else | |
189 const float kSpeechProbabilityReference = 0.67230678f; | |
190 const float kNoiseEstimateReference[] = {0.298195f, 0.345745f, 0.320528f}; | |
191 const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f, | |
192 -0.002399f, 0.001018f, -0.003189f}; | |
193 #endif | |
194 | |
195 RunBitexactnessTest(16000, 2, NoiseSuppression::Level::kLow, | |
196 kSpeechProbabilityReference, kNoiseEstimateReference, | |
197 kOutputReference); | |
198 } | |
199 | |
200 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerate) { | |
201 #if defined(WEBRTC_ARCH_ARM64) | |
202 const float kSpeechProbabilityReference = -4.0f; | |
203 const float kNoiseEstimateReference[] = {1.004436f, 3.711453f, 9.602631f}; | |
204 const float kOutputReference[] = {0.004669f, 0.005524f, 0.005432f}; | |
205 #elif defined(WEBRTC_ARCH_ARM) | |
206 const float kSpeechProbabilityReference = -4.0f; | |
207 const float kNoiseEstimateReference[] = {1.095946f, 3.351643f, 8.167248f}; | |
208 const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f}; | |
209 #else | |
210 const float kSpeechProbabilityReference = 0.70897013f; | |
211 const float kNoiseEstimateReference[] = {0.066269f, 0.199999f, 0.476885f}; | |
212 const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f}; | |
213 #endif | |
214 | |
215 RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kModerate, | |
216 kSpeechProbabilityReference, kNoiseEstimateReference, | |
217 kOutputReference); | |
218 } | |
219 | |
220 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHigh) { | |
221 #if defined(WEBRTC_ARCH_ARM64) | |
222 const float kSpeechProbabilityReference = -4.0f; | |
223 const float kNoiseEstimateReference[] = {1.023022f, 3.759059f, 9.614030f}; | |
224 const float kOutputReference[] = {0.004639f, 0.005402f, 0.005310f}; | |
225 #elif defined(WEBRTC_ARCH_ARM) | |
226 const float kSpeechProbabilityReference = -4.0f; | |
227 const float kNoiseEstimateReference[] = {1.114510f, 3.410356f, 8.262188f}; | |
228 const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f}; | |
229 #else | |
230 const float kSpeechProbabilityReference = 0.70106733f; | |
231 const float kNoiseEstimateReference[] = {0.067901f, 0.204835f, 0.481723f}; | |
232 const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f}; | |
233 #endif | |
234 | |
235 RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kHigh, | |
236 kSpeechProbabilityReference, kNoiseEstimateReference, | |
237 kOutputReference); | |
238 } | |
239 | |
240 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) { | |
241 #if defined(WEBRTC_ARCH_ARM64) | |
242 const float kSpeechProbabilityReference = -4.0f; | |
243 const float kNoiseEstimateReference[] = {2.614974f, 6.041980f, 14.029047f}; | |
244 const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; | |
245 #elif defined(WEBRTC_ARCH_ARM) | |
246 const float kSpeechProbabilityReference = -4.0f; | |
247 const float kNoiseEstimateReference[] = {2.614974f, 6.041980f, 14.029047f}; | |
248 const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; | |
249 #else | |
250 const float kSpeechProbabilityReference = 0.70281971f; | |
251 const float kNoiseEstimateReference[] = {0.068797f, 0.205191f, 0.481312f}; | |
252 const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f}; | |
253 #endif | |
254 | |
255 RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kVeryHigh, | |
256 kSpeechProbabilityReference, kNoiseEstimateReference, | |
257 kOutputReference); | |
258 } | |
259 | |
260 } // namespace webrtc | |
OLD | NEW |