webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc - Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc ('k') | webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
195 const float kTestNonZeroVarLambdaTop[] = {	195 const float kTestNonZeroVarLambdaTop[] = {

196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,	196 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,

197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,	197 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,

198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};	198 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};

199 static_assert(arraysize(kTestCenterFreqs) ==	199 static_assert(arraysize(kTestCenterFreqs) ==

200 arraysize(kTestNonZeroVarLambdaTop),	200 arraysize(kTestNonZeroVarLambdaTop),

201 "Power test data badly initialized.");	201 "Power test data badly initialized.");

202 const float kMaxTestError = 0.005f;	202 const float kMaxTestError = 0.005f;

203	203

204 // Enhancer initialization parameters.	204 // Enhancer initialization parameters.

205 const int kSamples = 1000;	205 const int kSamples = 10000;

206 const int kSampleRate = 4000;	206 const int kSampleRate = 4000;

207 const int kNumChannels = 1;	207 const int kNumChannels = 1;

208 const int kFragmentSize = kSampleRate / 100;	208 const int kFragmentSize = kSampleRate / 100;

209 const size_t kNumNoiseBins = 129;	209 const size_t kNumNoiseBins = 129;

210	210

211 // Number of frames to process in the bitexactness tests.	211 // Number of frames to process in the bitexactness tests.

212 const size_t kNumFramesToProcess = 1000;	212 const size_t kNumFramesToProcess = 1000;

213	213

214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {	214 int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {

215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz	215 return (sample_rate_hz > AudioProcessing::kSampleRate16kHz

216 ? AudioProcessing::kSampleRate16kHz	216 ? AudioProcessing::kSampleRate16kHz

217 : sample_rate_hz);	217 : sample_rate_hz);

218 }	218 }

219	219

220 // Process one frame of data and produce the output.	220 // Process one frame of data and produce the output.

221 void ProcessOneFrame(int sample_rate_hz,	221 void ProcessOneFrame(int sample_rate_hz,

222 AudioBuffer* render_audio_buffer,	222 AudioBuffer* render_audio_buffer,

223 AudioBuffer* capture_audio_buffer,	223 AudioBuffer* capture_audio_buffer,

224 NoiseSuppressionImpl* noise_suppressor,	224 NoiseSuppressionImpl* noise_suppressor,

225 IntelligibilityEnhancer* intelligibility_enhancer) {	225 IntelligibilityEnhancer* intelligibility_enhancer) {

226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {	226 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {

227 render_audio_buffer->SplitIntoFrequencyBands();	227 render_audio_buffer->SplitIntoFrequencyBands();

228 capture_audio_buffer->SplitIntoFrequencyBands();	228 capture_audio_buffer->SplitIntoFrequencyBands();

229 }	229 }

230	230

231 intelligibility_enhancer->ProcessRenderAudio(	231 intelligibility_enhancer->ProcessRenderAudio(

232 render_audio_buffer->split_channels_f(kBand0To8kHz),	232 render_audio_buffer, IntelligibilityEnhancerSampleRate(sample_rate_hz));

233 IntelligibilityEnhancerSampleRate(sample_rate_hz),

234 render_audio_buffer->num_channels());

235	233

236 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);	234 noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);

237 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);	235 noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);

238	236

239 intelligibility_enhancer->SetCaptureNoiseEstimate(	237 intelligibility_enhancer->SetCaptureNoiseEstimate(

240 noise_suppressor->NoiseEstimate(), 0);	238 noise_suppressor->NoiseEstimate(), 0);

241	239

242 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {	240 if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {

243 render_audio_buffer->MergeFrequencyBands();	241 render_audio_buffer->MergeFrequencyBands();

244 }	242 }

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
313	311

314 float float_rand() {	312 float float_rand() {

315 return std::rand() * 2.f / RAND_MAX - 1;	313 return std::rand() * 2.f / RAND_MAX - 1;

316 }	314 }

317	315

318 } // namespace	316 } // namespace

319	317

320 class IntelligibilityEnhancerTest : public ::testing::Test {	318 class IntelligibilityEnhancerTest : public ::testing::Test {

321 protected:	319 protected:

322 IntelligibilityEnhancerTest()	320 IntelligibilityEnhancerTest()

323 : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {	321 : clear_buffer_(kFragmentSize,

	322 kNumChannels,

	323 kFragmentSize,

	324 kNumChannels,

	325 kFragmentSize),

	326 stream_config_(kSampleRate, kNumChannels),

	327 clear_data_(kSamples),

	328 noise_data_(kNumNoiseBins),

	329 orig_data_(kSamples) {

324 std::srand(1);	330 std::srand(1);

325 enh_.reset(	331 enh_.reset(

326 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));	332 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));

327 }	333 }

328	334

329 bool CheckUpdate() {	335 bool CheckUpdate() {

330 enh_.reset(	336 enh_.reset(

331 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));	337 new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));

332 float* clear_cursor = clear_data_.data();	338 float* clear_cursor = clear_data_.data();

333 float* noise_cursor = noise_data_.data();

334 for (int i = 0; i < kSamples; i += kFragmentSize) {	339 for (int i = 0; i < kSamples; i += kFragmentSize) {

335 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);	340 enh_->SetCaptureNoiseEstimate(noise_data_, 1);

	341 clear_buffer_.CopyFrom(&clear_cursor, stream_config_);

	342 enh_->ProcessRenderAudio(&clear_buffer_, kSampleRate);

	343 clear_buffer_.CopyTo(stream_config_, &clear_cursor);

336 clear_cursor += kFragmentSize;	344 clear_cursor += kFragmentSize;

337 noise_cursor += kFragmentSize;

338 }	345 }

339 for (int i = 0; i < kSamples; i++) {	346 for (int i = 0; i < kSamples; i++) {

340 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) {	347 if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) {

341 return true;	348 return true;

342 }	349 }

343 }	350 }

344 return false;	351 return false;

345 }	352 }

346	353

347 std::unique_ptr<IntelligibilityEnhancer> enh_;	354 std::unique_ptr<IntelligibilityEnhancer> enh_;

	355 AudioBuffer clear_buffer_;

	356 StreamConfig stream_config_;

348 std::vector<float> clear_data_;	357 std::vector<float> clear_data_;

349 std::vector<float> noise_data_;	358 std::vector<float> noise_data_;

350 std::vector<float> orig_data_;	359 std::vector<float> orig_data_;

351 };	360 };

352	361

353 // For each class of generated data, tests that render stream is updated when	362 // For each class of generated data, tests that render stream is updated when

354 // it should be.	363 // it should be.

355 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {	364 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {

356 std::fill(noise_data_.begin(), noise_data_.end(), 0.f);	365 std::fill(noise_data_.begin(), noise_data_.end(), 0.f);

357 std::fill(orig_data_.begin(), orig_data_.end(), 0.f);	366 std::fill(orig_data_.begin(), orig_data_.end(), 0.f);

358 std::fill(clear_data_.begin(), clear_data_.end(), 0.f);	367 std::fill(clear_data_.begin(), clear_data_.end(), 0.f);

359 EXPECT_FALSE(CheckUpdate());	368 EXPECT_FALSE(CheckUpdate());

360 std::generate(noise_data_.begin(), noise_data_.end(), float_rand);

361 EXPECT_FALSE(CheckUpdate());

362 std::generate(clear_data_.begin(), clear_data_.end(), float_rand);	369 std::generate(clear_data_.begin(), clear_data_.end(), float_rand);

363 orig_data_ = clear_data_;	370 orig_data_ = clear_data_;

	371 EXPECT_FALSE(CheckUpdate());

	372 std::generate(noise_data_.begin(), noise_data_.end(), float_rand);

	373 FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());

364 EXPECT_TRUE(CheckUpdate());	374 EXPECT_TRUE(CheckUpdate());

365 }	375 }

366	376

367 // Tests ERB bank creation, comparing against matlab output.	377 // Tests ERB bank creation, comparing against matlab output.

368 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {	378 TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {

369 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);	379 ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);

370 for (size_t i = 0; i < enh_->bank_size_; ++i) {	380 for (size_t i = 0; i < enh_->bank_size_; ++i) {

371 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);	381 EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);

372 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);	382 ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);

373 for (size_t j = 0; j < enh_->freqs_; ++j) {	383 for (size_t j = 0; j < enh_->freqs_; ++j) {

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
411 const float kTolerance = 0.007f;	421 const float kTolerance = 0.007f;

412 std::vector<float> noise(kNumNoiseBins);	422 std::vector<float> noise(kNumNoiseBins);

413 std::vector<float> noise_psd(kNumNoiseBins);	423 std::vector<float> noise_psd(kNumNoiseBins);

414 std::generate(noise.begin(), noise.end(), float_rand);	424 std::generate(noise.begin(), noise.end(), float_rand);

415 for (size_t i = 0; i < kNumNoiseBins; ++i) {	425 for (size_t i = 0; i < kNumNoiseBins; ++i) {

416 noise_psd[i] = kGain * kGain * noise[i] * noise[i];	426 noise_psd[i] = kGain * kGain * noise[i] * noise[i];

417 }	427 }

418 float* clear_cursor = clear_data_.data();	428 float* clear_cursor = clear_data_.data();

419 for (size_t i = 0; i < kNumFramesToProcess; ++i) {	429 for (size_t i = 0; i < kNumFramesToProcess; ++i) {

420 enh_->SetCaptureNoiseEstimate(noise, kGain);	430 enh_->SetCaptureNoiseEstimate(noise, kGain);

421 enh_->ProcessRenderAudio(&clear_cursor, kSampleRate, kNumChannels);	431 clear_buffer_.CopyFrom(&clear_cursor, stream_config_);

	432 enh_->ProcessRenderAudio(&clear_buffer_, kSampleRate);

422 }	433 }

423 const std::vector<float>& estimated_psd =	434 const std::vector<float>& estimated_psd =

424 enh_->noise_power_estimator_.power();	435 enh_->noise_power_estimator_.power();

425 for (size_t i = 0; i < kNumNoiseBins; ++i) {	436 for (size_t i = 0; i < kNumNoiseBins; ++i) {

426 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],	437 EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],

427 kTolerance);	438 kTolerance);

428 }	439 }

429 }	440 }

430	441

431 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {	442 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
474 }	485 }

475	486

476 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {	487 TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {

477 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,	488 const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,

478 -0.012975f, -0.015940f, -0.017820f};	489 -0.012975f, -0.015940f, -0.017820f};

479	490

480 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);	491 RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);

481 }	492 }

482	493

483 } // namespace webrtc	494 } // namespace webrtc

OLD	NEW