Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
|
hlundin-webrtc
2016/03/16 12:44:28
2016
peah-webrtc
2016/03/17 13:15:00
Great find!
Done.
| |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 #include <vector> | |
| 11 | |
| 12 #include "testing/gtest/include/gtest/gtest.h" | |
| 13 #include "webrtc/base/array_view.h" | |
| 14 #include "webrtc/base/random.h" | |
| 15 #include "webrtc/modules/audio_processing/audio_buffer.h" | |
| 16 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" | |
| 17 #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" | |
| 18 #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" | |
| 19 | |
| 20 namespace webrtc { | |
| 21 namespace { | |
| 22 | |
| 23 // Process one frame of data and produce the output. | |
| 24 void ProcessOneFrame(int sample_rate_hz, | |
| 25 AudioBuffer* audio_buffer, | |
| 26 NoiseSuppressionImpl* noise_suppressor, | |
| 27 std::vector<float>* frame_output, | |
| 28 float* speech_probability, | |
| 29 std::vector<float>* noise_estimate) { | |
| 30 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
| 31 audio_buffer->SplitIntoFrequencyBands(); | |
| 32 } | |
| 33 | |
| 34 noise_suppressor->AnalyzeCaptureAudio(audio_buffer); | |
| 35 noise_suppressor->ProcessCaptureAudio(audio_buffer); | |
| 36 *speech_probability = noise_suppressor->speech_probability(); | |
| 37 *noise_estimate = noise_suppressor->NoiseEstimate(); | |
| 38 | |
| 39 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { | |
| 40 audio_buffer->MergeFrequencyBands(); | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 // Forms a predefined random test vector- | |
|
hlundin-webrtc
2016/03/16 12:44:27
End with '.', not '-'.
hlundin-webrtc
2016/03/16 12:44:28
Please, tell me what the range of the samples is.
peah-webrtc
2016/03/17 13:15:00
Done.
peah-webrtc
2016/03/17 13:15:00
Good point! This part of the code is now removed.
| |
| 45 void ConstructTestVector(int samples_per_channel, | |
|
hlundin-webrtc
2016/03/16 12:44:28
Make all int parameters size_t.
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 46 int num_channels, | |
| 47 int frame_counter, | |
| 48 Random* rand_gen, | |
| 49 std::vector<float>* testvector) { | |
| 50 testvector->resize(samples_per_channel * num_channels); | |
| 51 | |
| 52 bool low_level = ((frame_counter / 10) > 5); | |
|
hlundin-webrtc
2016/03/16 12:44:28
Why not
bool low_level = (frame_counter >= 60);
?
peah-webrtc
2016/03/17 13:15:00
Good point! The code is now removed.
| |
| 53 float scale = (low_level ? 0.01f : 1.0f); | |
|
hlundin-webrtc
2016/03/16 12:44:28
... or even
const float scale = frame_counter >= 6
peah-webrtc
2016/03/17 13:15:01
Another good point! The code is now removed.
| |
| 54 | |
| 55 for (auto& v : *testvector) { | |
| 56 v = scale * (2.0f * rand_gen->Rand<float>() - 1.0f); | |
| 57 } | |
| 58 } | |
| 59 | |
| 60 void SetupNoiseSuppressor(int sample_rate_hz, | |
| 61 int num_channels, | |
| 62 NoiseSuppressionImpl::Level level, | |
| 63 NoiseSuppressionImpl* noise_suppressor) { | |
| 64 noise_suppressor->Initialize(num_channels, sample_rate_hz); | |
| 65 noise_suppressor->Enable(true); | |
| 66 noise_suppressor->set_level(level); | |
| 67 } | |
| 68 | |
| 69 // Verifies the output of the test against a reference and reports the results | |
| 70 // using the gtest EXPECT_PRED_FORMAT2 functionality | |
| 71 void VerifyOutput(const StreamConfig& stream_config, | |
| 72 float speech_probability_reference, | |
| 73 const rtc::ArrayView<const float>& noise_estimate_reference, | |
| 74 const rtc::ArrayView<const float>& output_reference, | |
| 75 const std::vector<float>& output, | |
|
hlundin-webrtc
2016/03/16 12:44:27
Nit: the internal order of reference and test para
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 76 float speech_probability, | |
| 77 const std::vector<float>& noise_estimate) { | |
| 78 // Form vectors to compare the reference to. Only the first values of the | |
| 79 // outputs are compared in order not having to specify all preceeding frames | |
| 80 // as testvectors. | |
| 81 const size_t reference_frame_length = | |
| 82 output_reference.size() / stream_config.num_channels(); | |
|
hlundin-webrtc
2016/03/16 12:44:28
This should be an exact division, right? Consider
peah-webrtc
2016/03/17 13:15:00
Good point! Thanks for the suggestion!
Done.
| |
| 83 std::vector<float> output_to_verify; | |
| 84 for (size_t channel_no = 0; channel_no < stream_config.num_channels(); | |
| 85 ++channel_no) { | |
| 86 output_to_verify.insert( | |
| 87 output_to_verify.end(), | |
| 88 output.begin() + channel_no * stream_config.num_frames(), | |
| 89 output.begin() + channel_no * stream_config.num_frames() + | |
| 90 reference_frame_length); | |
| 91 } | |
| 92 | |
| 93 EXPECT_PRED_FORMAT2(test::AssertVectorsNotEqual, output_to_verify, | |
| 94 output_reference); | |
| 95 EXPECT_PRED_FORMAT2(test::AssertVectorsNotEqual, noise_estimate, | |
| 96 noise_estimate_reference); | |
| 97 EXPECT_PRED_FORMAT2(test::AssertFloatsNotEqual, speech_probability, | |
|
hlundin-webrtc
2016/03/16 12:44:28
Did you consider using EXPECT_FLOAT_EQ or EXPECT_N
peah-webrtc
2016/03/17 13:15:00
Great suggestion!
Done.
| |
| 98 speech_probability_reference); | |
| 99 } | |
| 100 | |
| 101 // Processes a specified amount of frames, verifies the results and reports | |
| 102 // any errors. | |
| 103 void RunBitexactnessTest( | |
| 104 int sample_rate_hz, | |
| 105 int num_channels, | |
|
hlundin-webrtc
2016/03/16 12:44:28
int -> size_t
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 106 int num_frames_to_process, | |
|
hlundin-webrtc
2016/03/16 12:44:28
int -> size_t
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 107 NoiseSuppressionImpl::Level level, | |
| 108 float speech_probability_reference, | |
| 109 const rtc::ArrayView<const float>& noise_estimate_reference, | |
| 110 const rtc::ArrayView<const float>& output_reference) { | |
| 111 Random rand_gen(42); | |
| 112 int samples_per_channel = 80 * sample_rate_hz / 8000; | |
|
hlundin-webrtc
2016/03/16 12:44:28
I think sample_rate_hz / 100 works just as well, w
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 113 const StreamConfig stream_config(sample_rate_hz, num_channels, false); | |
| 114 AudioBuffer audio_buffer( | |
| 115 stream_config.num_frames(), stream_config.num_channels(), | |
| 116 stream_config.num_frames(), stream_config.num_channels(), | |
| 117 stream_config.num_frames()); | |
| 118 | |
| 119 rtc::CriticalSection crit; | |
| 120 NoiseSuppressionImpl noise_suppressor(&crit); | |
| 121 SetupNoiseSuppressor(sample_rate_hz, num_channels, level, &noise_suppressor); | |
| 122 | |
| 123 std::vector<float> output; | |
| 124 float speech_probability = 0.0f; | |
| 125 std::vector<float> noise_estimate; | |
| 126 std::vector<float> frame_input; | |
| 127 std::vector<float> frame_output; | |
| 128 for (int frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { | |
|
hlundin-webrtc
2016/03/16 12:44:28
size_t frame_no
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 129 ConstructTestVector(samples_per_channel, num_channels, frame_no, &rand_gen, | |
| 130 &frame_input); | |
| 131 | |
| 132 test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); | |
| 133 | |
| 134 ProcessOneFrame(sample_rate_hz, &audio_buffer, &noise_suppressor, &output, | |
| 135 &speech_probability, &noise_estimate); | |
| 136 | |
| 137 test::ExtractVectorFromAudioBuffer(stream_config, &audio_buffer, | |
| 138 &frame_output); | |
| 139 } | |
| 140 | |
| 141 // Compare the output to the reference. Only the first values of the output | |
|
hlundin-webrtc
2016/03/16 12:44:28
Compare ... with
hlundin-webrtc
2016/03/16 12:44:28
Wonky line-breaks in this paragraph.
peah-webrtc
2016/03/17 13:15:00
Done.
peah-webrtc
2016/03/17 13:15:01
Done.
| |
| 142 // from last frame processed | |
| 143 // is compared in order not having to specify all preceeding frames as | |
|
hlundin-webrtc
2016/03/16 12:44:27
are compared
peah-webrtc
2016/03/17 13:15:00
Done.
| |
| 144 // testvectors. | |
| 145 // As the algorithm being tested has a memory, testing only | |
| 146 // the last frame implicitly also tests the preceeding frames. | |
| 147 VerifyOutput(stream_config, speech_probability_reference, | |
| 148 noise_estimate_reference, output_reference, frame_output, | |
| 149 speech_probability, noise_estimate); | |
| 150 } | |
| 151 | |
| 152 } // namespace | |
| 153 | |
| 154 TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLowLevel) { | |
| 155 #if !defined(WEBRTC_ANDROID) | |
| 156 const float kOutputReference[] = {0.000921f, 0.003884f, -0.000689f}; | |
| 157 const float kNoiseEstimateReference[] = {0.028336f, 0.039530f, 0.042970f}; | |
| 158 const float kSpeechProbabilityReference = 0.122579f; | |
| 159 #else | |
| 160 const float kOutputReference[] = {0.000916f, 0.003876f, -0.000702f}; | |
| 161 const float kNoiseEstimateReference[] = {12.392536f, 13.370509f, 11.658783f}; | |
| 162 const float kSpeechProbabilityReference = -4.000000f; | |
| 163 #endif | |
| 164 | |
| 165 RunBitexactnessTest(8000, 1, 1000, NoiseSuppression::Level::kLow, | |
| 166 kSpeechProbabilityReference, | |
| 167 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 168 rtc::ArrayView<const float>(kOutputReference)); | |
| 169 } | |
| 170 | |
| 171 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLowLevel) { | |
| 172 #if !defined(WEBRTC_ANDROID) | |
| 173 const float kOutputReference[] = {0.002048f, 0.001845f, 0.003762f}; | |
| 174 const float kNoiseEstimateReference[] = {0.032741f, 0.052345f, 0.063557f}; | |
| 175 const float kSpeechProbabilityReference = 0.110951f; | |
| 176 #else | |
| 177 const float kOutputReference[] = {0.002014f, 0.001831f, 0.003754f}; | |
| 178 const float kNoiseEstimateReference[] = {7.268418f, 8.785124f, 8.383295f}; | |
| 179 const float kSpeechProbabilityReference = -4.000000f; | |
| 180 #endif | |
| 181 | |
| 182 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kLow, | |
| 183 kSpeechProbabilityReference, | |
| 184 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 185 rtc::ArrayView<const float>(kOutputReference)); | |
| 186 } | |
| 187 | |
| 188 TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLowLevel) { | |
| 189 #if !defined(WEBRTC_ANDROID) | |
| 190 const float kOutputReference[] = {-0.005249f, 0.001465f, -0.002533f}; | |
| 191 const float kNoiseEstimateReference[] = {0.025969f, 0.035012f, 0.035499f}; | |
| 192 const float kSpeechProbabilityReference = 0.139357f; | |
| 193 #else | |
| 194 const float kOutputReference[] = {-0.005219f, 0.001373f, -0.002472f}; | |
| 195 const float kNoiseEstimateReference[] = {12.616668f, 12.766106f, 11.475318f}; | |
| 196 const float kSpeechProbabilityReference = -4.000000f; | |
| 197 #endif | |
| 198 | |
| 199 RunBitexactnessTest(32000, 1, 1000, NoiseSuppression::Level::kLow, | |
| 200 kSpeechProbabilityReference, | |
| 201 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 202 rtc::ArrayView<const float>(kOutputReference)); | |
| 203 } | |
| 204 | |
| 205 TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLowLevel) { | |
| 206 #if !defined(WEBRTC_ANDROID) | |
| 207 const float kOutputReference[] = {0.001224f, 0.005314f, 0.002205f}; | |
| 208 const float kNoiseEstimateReference[] = {0.022175f, 0.031690f, 0.036631f}; | |
| 209 const float kSpeechProbabilityReference = 0.101083f; | |
| 210 #else | |
| 211 const float kOutputReference[] = {0.001181f, 0.005317f, 0.002217f}; | |
| 212 const float kNoiseEstimateReference[] = {7.594315f, 9.309500f, 9.561249f}; | |
| 213 const float kSpeechProbabilityReference = -4.000000f; | |
| 214 #endif | |
| 215 | |
| 216 RunBitexactnessTest(48000, 1, 1000, NoiseSuppression::Level::kLow, | |
| 217 kSpeechProbabilityReference, | |
| 218 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 219 rtc::ArrayView<const float>(kOutputReference)); | |
| 220 } | |
| 221 | |
| 222 TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLowLevel) { | |
| 223 #if !defined(WEBRTC_ANDROID) | |
| 224 const float kOutputReference[] = {0.000954f, 0.002081f, -0.001125f, | |
| 225 -0.003688f, 0.004999f, -0.004168f}; | |
| 226 const float kNoiseEstimateReference[] = {0.037820f, 0.054766f, 0.057829f}; | |
| 227 const float kSpeechProbabilityReference = 0.106168f; | |
| 228 #else | |
| 229 const float kOutputReference[] = {0.000946f, 0.002106f, -0.001099f, | |
| 230 -0.003693f, 0.004975f, -0.004181f}; | |
| 231 const float kNoiseEstimateReference[] = {9.096287f, 8.000648f, 8.823565f}; | |
| 232 const float kSpeechProbabilityReference = -4.000000f; | |
| 233 #endif | |
| 234 | |
| 235 RunBitexactnessTest(16000, 2, 1000, NoiseSuppression::Level::kLow, | |
| 236 kSpeechProbabilityReference, | |
| 237 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 238 rtc::ArrayView<const float>(kOutputReference)); | |
| 239 } | |
| 240 | |
| 241 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerateLevel) { | |
| 242 #if !defined(WEBRTC_ANDROID) | |
| 243 const float kOutputReference[] = {0.001036f, 0.000783f, 0.001862f}; | |
| 244 const float kNoiseEstimateReference[] = {0.032994f, 0.052608f, 0.065461f}; | |
| 245 const float kSpeechProbabilityReference = 0.110952f; | |
| 246 #else | |
| 247 const float kOutputReference[] = {0.000977f, 0.000763f, 0.001801f}; | |
| 248 const float kNoiseEstimateReference[] = {7.269972f, 8.785130f, 8.383298f}; | |
| 249 const float kSpeechProbabilityReference = -4.000000f; | |
| 250 #endif | |
| 251 | |
| 252 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kModerate, | |
| 253 kSpeechProbabilityReference, | |
| 254 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 255 rtc::ArrayView<const float>(kOutputReference)); | |
| 256 } | |
| 257 | |
| 258 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHighLevel) { | |
| 259 #if !defined(WEBRTC_ANDROID) | |
| 260 const float kOutputReference[] = {0.000533f, 0.000372f, 0.000948f}; | |
| 261 const float kNoiseEstimateReference[] = {0.037815f, 0.055497f, 0.065549f}; | |
| 262 const float kSpeechProbabilityReference = 0.110951f; | |
| 263 #else | |
| 264 const float kOutputReference[] = {0.000519f, 0.000336f, 0.000885f}; | |
| 265 const float kNoiseEstimateReference[] = {7.271456f, 8.785111f, 8.383295f}; | |
| 266 const float kSpeechProbabilityReference = -4.000000f; | |
| 267 #endif | |
| 268 | |
| 269 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kHigh, | |
| 270 kSpeechProbabilityReference, | |
| 271 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 272 rtc::ArrayView<const float>(kOutputReference)); | |
| 273 } | |
| 274 | |
| 275 TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHighLevel) { | |
| 276 #if !defined(WEBRTC_ANDROID) | |
| 277 const float kOutputReference[] = {0.000367f, 0.000259f, 0.000647f}; | |
| 278 const float kNoiseEstimateReference[] = {0.038476f, 0.055677f, 0.065570f}; | |
| 279 const float kSpeechProbabilityReference = 0.110951f; | |
| 280 #else | |
| 281 const float kOutputReference[] = {0.000336f, 0.000244f, 0.000610f}; | |
| 282 const float kNoiseEstimateReference[] = {7.272960f, 8.785113f, 8.383295f}; | |
| 283 const float kSpeechProbabilityReference = -4.000000f; | |
| 284 #endif | |
| 285 | |
| 286 RunBitexactnessTest(16000, 1, 1000, NoiseSuppression::Level::kVeryHigh, | |
| 287 kSpeechProbabilityReference, | |
| 288 rtc::ArrayView<const float>(kNoiseEstimateReference), | |
| 289 rtc::ArrayView<const float>(kOutputReference)); | |
| 290 } | |
| 291 | |
| 292 } // namespace webrtc | |
| OLD | NEW |