Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(657)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)
Patch Set: Fix glitches Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 const float kTestNonZeroVarLambdaTop[] = { 195 const float kTestNonZeroVarLambdaTop[] = {
196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; 198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
199 static_assert(arraysize(kTestCenterFreqs) == 199 static_assert(arraysize(kTestCenterFreqs) ==
200 arraysize(kTestNonZeroVarLambdaTop), 200 arraysize(kTestNonZeroVarLambdaTop),
201 "Power test data badly initialized."); 201 "Power test data badly initialized.");
202 const float kMaxTestError = 0.005f; 202 const float kMaxTestError = 0.005f;
203 203
204 // Enhancer initialization parameters. 204 // Enhancer initialization parameters.
205 const int kSamples = 1000; 205 const int kSamples = 10000;
peah-webrtc 2016/09/13 13:30:00 What is the motivation behind the changed number o
aluebs-webrtc 2016/09/14 00:35:55 The number samples before where not enough for the
206 const int kSampleRate = 4000; 206 const int kSampleRate = 4000;
peah-webrtc 2016/09/13 13:30:00 What is the purpose of the samplerate constant of
aluebs-webrtc 2016/09/14 00:35:54 To set a sample rate for the tests.
peah-webrtc 2016/09/15 15:06:20 You mean that the test is running at a sample rate
aluebs-webrtc 2016/09/15 23:45:25 Other test (like the bitexactness and the one I ju
peah-webrtc 2016/09/16 13:35:56 It makes sense to test for other sample rates. But
aluebs-webrtc 2016/09/17 00:48:48 Acknowledged.
207 const int kNumChannels = 1; 207 const int kNumChannels = 1;
208 const int kFragmentSize = kSampleRate / 100; 208 const int kFragmentSize = kSampleRate / 100;
209 const size_t kNumNoiseBins = 129; 209 const size_t kNumNoiseBins = 129;
210 const size_t kNumBands = 1;
210 211
211 // Number of frames to process in the bitexactness tests. 212 // Number of frames to process in the bitexactness tests.
212 const size_t kNumFramesToProcess = 1000; 213 const size_t kNumFramesToProcess = 1000;
213 214
214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { 215 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz 216 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
216 ? AudioProcessing::kSampleRate16kHz 217 ? AudioProcessing::kSampleRate16kHz
217 : sample_rate_hz); 218 : sample_rate_hz);
218 } 219 }
219 220
220 // Process one frame of data and produce the output. 221 // Process one frame of data and produce the output.
221 void ProcessOneFrame(int sample_rate_hz, 222 void ProcessOneFrame(int sample_rate_hz,
222 AudioBuffer* render_audio_buffer, 223 AudioBuffer* render_audio_buffer,
223 AudioBuffer* capture_audio_buffer, 224 AudioBuffer* capture_audio_buffer,
224 NoiseSuppressionImpl* noise_suppressor, 225 NoiseSuppressionImpl* noise_suppressor,
225 IntelligibilityEnhancer* intelligibility_enhancer) { 226 IntelligibilityEnhancer* intelligibility_enhancer) {
226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { 227 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
227 render_audio_buffer->SplitIntoFrequencyBands(); 228 render_audio_buffer->SplitIntoFrequencyBands();
228 capture_audio_buffer->SplitIntoFrequencyBands(); 229 capture_audio_buffer->SplitIntoFrequencyBands();
229 } 230 }
230 231
231 intelligibility_enhancer->ProcessRenderAudio( 232 intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
232 render_audio_buffer->split_channels_f(kBand0To8kHz),
233 IntelligibilityEnhancerSampleRate(sample_rate_hz),
234 render_audio_buffer->num_channels());
235 233
236 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); 234 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
237 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); 235 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
238 236
239 intelligibility_enhancer->SetCaptureNoiseEstimate( 237 intelligibility_enhancer->SetCaptureNoiseEstimate(
240 noise_suppressor->NoiseEstimate(), 0); 238 noise_suppressor->NoiseEstimate(), 0);
241 239
242 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { 240 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
243 render_audio_buffer->MergeFrequencyBands(); 241 render_audio_buffer->MergeFrequencyBands();
244 } 242 }
(...skipping 24 matching lines...) Expand all
269 std::vector<float> capture_input(render_buffer.num_frames() * 267 std::vector<float> capture_input(render_buffer.num_frames() *
270 capture_buffer.num_channels()); 268 capture_buffer.num_channels());
271 269
272 rtc::CriticalSection crit_capture; 270 rtc::CriticalSection crit_capture;
273 NoiseSuppressionImpl noise_suppressor(&crit_capture); 271 NoiseSuppressionImpl noise_suppressor(&crit_capture);
274 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); 272 noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz);
275 noise_suppressor.Enable(true); 273 noise_suppressor.Enable(true);
276 274
277 IntelligibilityEnhancer intelligibility_enhancer( 275 IntelligibilityEnhancer intelligibility_enhancer(
278 IntelligibilityEnhancerSampleRate(sample_rate_hz), 276 IntelligibilityEnhancerSampleRate(sample_rate_hz),
279 render_config.num_channels(), NoiseSuppressionImpl::num_noise_bins()); 277 render_config.num_channels(), kNumBands,
278 NoiseSuppressionImpl::num_noise_bins());
280 279
281 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { 280 for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
282 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), 281 ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
283 render_buffer.num_channels(), &render_file, 282 render_buffer.num_channels(), &render_file,
284 render_input); 283 render_input);
285 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), 284 ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(),
286 capture_buffer.num_channels(), &capture_file, 285 capture_buffer.num_channels(), &capture_file,
287 capture_input); 286 capture_input);
288 287
289 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); 288 test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer);
(...skipping 23 matching lines...) Expand all
313 312
314 float float_rand() { 313 float float_rand() {
315 return std::rand() * 2.f / RAND_MAX - 1; 314 return std::rand() * 2.f / RAND_MAX - 1;
316 } 315 }
317 316
318 } // namespace 317 } // namespace
319 318
320 class IntelligibilityEnhancerTest : public ::testing::Test { 319 class IntelligibilityEnhancerTest : public ::testing::Test {
321 protected: 320 protected:
322 IntelligibilityEnhancerTest() 321 IntelligibilityEnhancerTest()
323 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) { 322 : clear_buffer_(kFragmentSize,
323 kNumChannels,
324 kFragmentSize,
325 kNumChannels,
326 kFragmentSize),
327 stream_config_(kSampleRate, kNumChannels),
328 clear_data_(kSamples),
329 noise_data_(kNumNoiseBins),
330 orig_data_(kSamples) {
324 std::srand(1); 331 std::srand(1);
325 enh_.reset( 332 enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
326 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); 333 kNumNoiseBins));
327 } 334 }
328 335
329 bool CheckUpdate() { 336 bool CheckUpdate() {
peah-webrtc 2016/09/13 13:30:00 I'd like a more descriptive name here. This method
aluebs-webrtc 2016/09/14 00:35:54 I think that it is a great idea, but I don't think
peah-webrtc 2016/09/15 15:06:20 Acknowledged.
330 enh_.reset( 337 enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
331 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins)); 338 kNumNoiseBins));
332 float* clear_cursor = clear_data_.data(); 339 float* clear_cursor = clear_data_.data();
peah-webrtc 2016/09/13 13:30:00 The name cursor I think is quite misleading as I c
aluebs-webrtc 2016/09/14 00:35:54 See above about naming.
333 float* noise_cursor = noise_data_.data();
334 for (int i = 0; i < kSamples; i += kFragmentSize) { 340 for (int i = 0; i < kSamples; i += kFragmentSize) {
335 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); 341 enh_->SetCaptureNoiseEstimate(noise_data_, 1);
342 clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
343 enh_->ProcessRenderAudio(&clear_buffer_);
344 clear_buffer_.CopyTo(stream_config_, &clear_cursor);
336 clear_cursor += kFragmentSize; 345 clear_cursor += kFragmentSize;
337 noise_cursor += kFragmentSize;
338 } 346 }
339 for (int i = 0; i < kSamples; i++) { 347 for (int i = initial_delay_; i < kSamples; i++) {
340 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) { 348 if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
peah-webrtc 2016/09/13 13:30:00 As far as I can see, this does not verify that the
aluebs-webrtc 2016/09/14 00:35:54 Added test.
349 kMaxTestError) {
341 return true; 350 return true;
342 } 351 }
343 } 352 }
344 return false; 353 return false;
345 } 354 }
346 355
347 std::unique_ptr<IntelligibilityEnhancer> enh_; 356 std::unique_ptr<IntelligibilityEnhancer> enh_;
357 AudioBuffer clear_buffer_;
peah-webrtc 2016/09/13 13:30:00 What is the reason for this name? Please explain,
aluebs-webrtc 2016/09/14 00:35:54 To be consistent with the naming in the test I am
358 StreamConfig stream_config_;
348 std::vector<float> clear_data_; 359 std::vector<float> clear_data_;
349 std::vector<float> noise_data_; 360 std::vector<float> noise_data_;
350 std::vector<float> orig_data_; 361 std::vector<float> orig_data_;
362 size_t initial_delay_;
351 }; 363 };
352 364
353 // For each class of generated data, tests that render stream is updated when 365 // For each class of generated data, tests that render stream is updated when
354 // it should be. 366 // it should be.
355 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { 367 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
368 initial_delay_ = enh_->render_mangler_->initial_delay();
356 std::fill(noise_data_.begin(), noise_data_.end(), 0.f); 369 std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
357 std::fill(orig_data_.begin(), orig_data_.end(), 0.f); 370 std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
358 std::fill(clear_data_.begin(), clear_data_.end(), 0.f); 371 std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
359 EXPECT_FALSE(CheckUpdate()); 372 EXPECT_FALSE(CheckUpdate());
360 std::generate(noise_data_.begin(), noise_data_.end(), float_rand); 373 std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
374 orig_data_ = clear_data_;
361 EXPECT_FALSE(CheckUpdate()); 375 EXPECT_FALSE(CheckUpdate());
362 std::generate(clear_data_.begin(), clear_data_.end(), float_rand); 376 std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
363 orig_data_ = clear_data_; 377 orig_data_ = clear_data_;
378 std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
379 FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
364 EXPECT_TRUE(CheckUpdate()); 380 EXPECT_TRUE(CheckUpdate());
365 } 381 }
366 382
367 // Tests ERB bank creation, comparing against matlab output. 383 // Tests ERB bank creation, comparing against matlab output.
368 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { 384 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
369 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); 385 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
370 for (size_t i = 0; i < enh_->bank_size_; ++i) { 386 for (size_t i = 0; i < enh_->bank_size_; ++i) {
371 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); 387 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
372 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); 388 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
373 for (size_t j = 0; j < enh_->freqs_; ++j) { 389 for (size_t j = 0; j < enh_->freqs_; ++j) {
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
411 const float kTolerance = 0.007f; 427 const float kTolerance = 0.007f;
412 std::vector<float> noise(kNumNoiseBins); 428 std::vector<float> noise(kNumNoiseBins);
413 std::vector<float> noise_psd(kNumNoiseBins); 429 std::vector<float> noise_psd(kNumNoiseBins);
414 std::generate(noise.begin(), noise.end(), float_rand); 430 std::generate(noise.begin(), noise.end(), float_rand);
415 for (size_t i = 0; i < kNumNoiseBins; ++i) { 431 for (size_t i = 0; i < kNumNoiseBins; ++i) {
416 noise_psd[i] = kGain * kGain * noise[i] * noise[i]; 432 noise_psd[i] = kGain * kGain * noise[i] * noise[i];
417 } 433 }
418 float* clear_cursor = clear_data_.data(); 434 float* clear_cursor = clear_data_.data();
419 for (size_t i = 0; i < kNumFramesToProcess; ++i) { 435 for (size_t i = 0; i < kNumFramesToProcess; ++i) {
420 enh_->SetCaptureNoiseEstimate(noise, kGain); 436 enh_->SetCaptureNoiseEstimate(noise, kGain);
421 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels); 437 clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
peah-webrtc 2016/09/13 13:30:00 Why don't you update the clear cursor counter here
aluebs-webrtc 2016/09/14 00:35:54 Because it is irrelevant to the Noise PSD estimati
peah-webrtc 2016/09/15 15:06:20 I see, so basically you don't really care about wh
aluebs-webrtc 2016/09/15 23:45:25 Agreed. But it is unrelated to this CL, so we shou
438 enh_->ProcessRenderAudio(&clear_buffer_);
422 } 439 }
423 const std::vector<float>& estimated_psd = 440 const std::vector<float>& estimated_psd =
424 enh_->noise_power_estimator_.power(); 441 enh_->noise_power_estimator_.power();
425 for (size_t i = 0; i < kNumNoiseBins; ++i) { 442 for (size_t i = 0; i < kNumNoiseBins; ++i) {
426 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], 443 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
427 kTolerance); 444 kTolerance);
428 } 445 }
429 } 446 }
430 447
431 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { 448 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 } 491 }
475 492
476 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { 493 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
477 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, 494 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
478 -0.012975f, -0.015940f, -0.017820f}; 495 -0.012975f, -0.015940f, -0.017820f};
479 496
480 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); 497 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
481 } 498 }
482 499
483 } // namespace webrtc 500 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698