webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Resync Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 12 matching lines...) Expand all Loading...
23 #include <numeric>	23 #include <numeric>

24	24

25 #include "webrtc/base/checks.h"	25 #include "webrtc/base/checks.h"

26 #include "webrtc/common_audio/include/audio_util.h"	26 #include "webrtc/common_audio/include/audio_util.h"

27 #include "webrtc/common_audio/window_generator.h"	27 #include "webrtc/common_audio/window_generator.h"

28	28

29 namespace webrtc {	29 namespace webrtc {

30	30

31 namespace {	31 namespace {

32	32

33 const int kErbResolution = 2;	33 const size_t kErbResolution = 2;

34 const int kWindowSizeMs = 2;	34 const int kWindowSizeMs = 2;

35 const int kChunkSizeMs = 10; // Size provided by APM.	35 const int kChunkSizeMs = 10; // Size provided by APM.

36 const float kClipFreq = 200.0f;	36 const float kClipFreq = 200.0f;

37 const float kConfigRho = 0.02f; // Default production and interpretation SNR.	37 const float kConfigRho = 0.02f; // Default production and interpretation SNR.

38 const float kKbdAlpha = 1.5f;	38 const float kKbdAlpha = 1.5f;

39 const float kLambdaBot = -1.0f; // Extreme values in bisection	39 const float kLambdaBot = -1.0f; // Extreme values in bisection

40 const float kLambdaTop = -10e-18f; // search for lamda.	40 const float kLambdaTop = -10e-18f; // search for lamda.

41	41

42 } // namespace	42 } // namespace

43	43

44 using std::complex;	44 using std::complex;

45 using std::max;	45 using std::max;

46 using std::min;	46 using std::min;

47 using VarianceType = intelligibility::VarianceArray::StepType;	47 using VarianceType = intelligibility::VarianceArray::StepType;

48	48

49 IntelligibilityEnhancer::TransformCallback::TransformCallback(	49 IntelligibilityEnhancer::TransformCallback::TransformCallback(

50 IntelligibilityEnhancer* parent,	50 IntelligibilityEnhancer* parent,

51 IntelligibilityEnhancer::AudioSource source)	51 IntelligibilityEnhancer::AudioSource source)

52 : parent_(parent), source_(source) {	52 : parent_(parent), source_(source) {

53 }	53 }

54	54

55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(	55 void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(

56 const complex<float>* const* in_block,	56 const complex<float>* const* in_block,

57 int in_channels,	57 int in_channels,

58 int frames,	58 size_t frames,

59 int /* out_channels */,	59 int /* out_channels */,

60 complex<float>* const* out_block) {	60 complex<float>* const* out_block) {

61 DCHECK_EQ(parent_->freqs_, frames);	61 DCHECK_EQ(parent_->freqs_, frames);

62 for (int i = 0; i < in_channels; ++i) {	62 for (int i = 0; i < in_channels; ++i) {

63 parent_->DispatchAudio(source_, in_block[i], out_block[i]);	63 parent_->DispatchAudio(source_, in_block[i], out_block[i]);

64 }	64 }

65 }	65 }

66	66

67 IntelligibilityEnhancer::IntelligibilityEnhancer()	67 IntelligibilityEnhancer::IntelligibilityEnhancer()

68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {	68 : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {

69 }	69 }

70	70

71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)	71 IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)

72 : freqs_(RealFourier::ComplexLength(	72 : freqs_(RealFourier::ComplexLength(

73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),	73 RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),

74 window_size_(1 << RealFourier::FftOrder(freqs_)),	74 window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),

75 chunk_length_(config.sample_rate_hz * kChunkSizeMs / 1000),	75 chunk_length_(

	76 static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),

76 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),	77 bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),

77 sample_rate_hz_(config.sample_rate_hz),	78 sample_rate_hz_(config.sample_rate_hz),

78 erb_resolution_(kErbResolution),	79 erb_resolution_(kErbResolution),

79 num_capture_channels_(config.num_capture_channels),	80 num_capture_channels_(config.num_capture_channels),

80 num_render_channels_(config.num_render_channels),	81 num_render_channels_(config.num_render_channels),

81 analysis_rate_(config.analysis_rate),	82 analysis_rate_(config.analysis_rate),

82 active_(true),	83 active_(true),

83 clear_variance_(freqs_,	84 clear_variance_(freqs_,

84 config.var_type,	85 config.var_type,

85 config.var_window_size,	86 config.var_window_size,

(...skipping 14 matching lines...) Expand all Loading...
100 kbd_window_(new float[window_size_]),	101 kbd_window_(new float[window_size_]),

101 render_callback_(this, AudioSource::kRenderStream),	102 render_callback_(this, AudioSource::kRenderStream),

102 capture_callback_(this, AudioSource::kCaptureStream),	103 capture_callback_(this, AudioSource::kCaptureStream),

103 block_count_(0),	104 block_count_(0),

104 analysis_step_(0) {	105 analysis_step_(0) {

105 DCHECK_LE(config.rho, 1.0f);	106 DCHECK_LE(config.rho, 1.0f);

106	107

107 CreateErbBank();	108 CreateErbBank();

108	109

109 // Assumes all rho equal.	110 // Assumes all rho equal.

110 for (int i = 0; i < bank_size_; ++i) {	111 for (size_t i = 0; i < bank_size_; ++i) {

111 rho_[i] = config.rho * config.rho;	112 rho_[i] = config.rho * config.rho;

112 }	113 }

113	114

114 float freqs_khz = kClipFreq / 1000.0f;	115 float freqs_khz = kClipFreq / 1000.0f;

115 int erb_index = static_cast<int>(ceilf(	116 size_t erb_index = static_cast<size_t>(ceilf(

116 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));	117 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));

117 start_freq_ = max(1, erb_index * erb_resolution_);	118 start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);

118	119

119 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,	120 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,

120 kbd_window_.get());	121 kbd_window_.get());

121 render_mangler_.reset(new LappedTransform(	122 render_mangler_.reset(new LappedTransform(

122 num_render_channels_, num_render_channels_, chunk_length_,	123 num_render_channels_, num_render_channels_, chunk_length_,

123 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));	124 kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));

124 capture_mangler_.reset(new LappedTransform(	125 capture_mangler_.reset(new LappedTransform(

125 num_capture_channels_, num_capture_channels_, chunk_length_,	126 num_capture_channels_, num_capture_channels_, chunk_length_,

126 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));	127 kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));

127 }	128 }

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
231 lambda_top = lambda;	232 lambda_top = lambda;

232 }	233 }

233 power_ratio = std::fabs(power * reciprocal_power_target);	234 power_ratio = std::fabs(power * reciprocal_power_target);

234 ++iters;	235 ++iters;

235 }	236 }

236 }	237 }

237	238

238 void IntelligibilityEnhancer::UpdateErbGains() {	239 void IntelligibilityEnhancer::UpdateErbGains() {

239 // (ERB gain) = filterbank' * (freq gain)	240 // (ERB gain) = filterbank' * (freq gain)

240 float* gains = gain_applier_.target();	241 float* gains = gain_applier_.target();

241 for (int i = 0; i < freqs_; ++i) {	242 for (size_t i = 0; i < freqs_; ++i) {

242 gains[i] = 0.0f;	243 gains[i] = 0.0f;

243 for (int j = 0; j < bank_size_; ++j) {	244 for (size_t j = 0; j < bank_size_; ++j) {

244 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);	245 gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);

245 }	246 }

246 }	247 }

247 }	248 }

248	249

249 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,	250 void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,

250 complex<float>* /out_block/) {	251 complex<float>* /out_block/) {

251 noise_variance_.Step(in_block);	252 noise_variance_.Step(in_block);

252 }	253 }

253	254

254 int IntelligibilityEnhancer::GetBankSize(int sample_rate, int erb_resolution) {	255 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,

	256 size_t erb_resolution) {

255 float freq_limit = sample_rate / 2000.0f;	257 float freq_limit = sample_rate / 2000.0f;

256 int erb_scale = ceilf(	258 size_t erb_scale = static_cast<size_t>(ceilf(

257 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f);	259 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));

258 return erb_scale * erb_resolution;	260 return erb_scale * erb_resolution;

259 }	261 }

260	262

261 void IntelligibilityEnhancer::CreateErbBank() {	263 void IntelligibilityEnhancer::CreateErbBank() {

262 int lf = 1, rf = 4;	264 size_t lf = 1, rf = 4;

263	265

264 for (int i = 0; i < bank_size_; ++i) {	266 for (size_t i = 0; i < bank_size_; ++i) {

265 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));	267 float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));

266 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));	268 center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));

267 center_freqs_[i] -= 14678.49f;	269 center_freqs_[i] -= 14678.49f;

268 }	270 }

269 float last_center_freq = center_freqs_[bank_size_ - 1];	271 float last_center_freq = center_freqs_[bank_size_ - 1];

270 for (int i = 0; i < bank_size_; ++i) {	272 for (size_t i = 0; i < bank_size_; ++i) {

271 center_freqs_[i] = 0.5f sample_rate_hz_ / last_center_freq;	273 center_freqs_[i] = 0.5f sample_rate_hz_ / last_center_freq;

272 }	274 }

273	275

274 for (int i = 0; i < bank_size_; ++i) {	276 for (size_t i = 0; i < bank_size_; ++i) {

275 filter_bank_[i].resize(freqs_);	277 filter_bank_[i].resize(freqs_);

276 }	278 }

277	279

278 for (int i = 1; i <= bank_size_; ++i) {	280 for (size_t i = 1; i <= bank_size_; ++i) {

279 int lll, ll, rr, rrr;	281 size_t lll, ll, rr, rrr;

280 lll = round(center_freqs_[max(1, i - lf) - 1] * freqs_ /	282 static const size_t kOne = 1; // Avoids repeated static_cast<>s below.

281 (0.5f * sample_rate_hz_));	283 lll = static_cast<size_t>(round(

282 ll =	284 center_freqs_[max(kOne, i - lf) - 1] * freqs_ /

283 round(center_freqs_[max(1, i) - 1] * freqs_ / (0.5f * sample_rate_hz_));	285 (0.5f * sample_rate_hz_)));

284 lll = min(freqs_, max(lll, 1)) - 1;	286 ll = static_cast<size_t>(round(

285 ll = min(freqs_, max(ll, 1)) - 1;	287 center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));

	288 lll = min(freqs_, max(lll, kOne)) - 1;

	289 ll = min(freqs_, max(ll, kOne)) - 1;

286	290

287 rrr = round(center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /	291 rrr = static_cast<size_t>(round(

288 (0.5f * sample_rate_hz_));	292 center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /

289 rr = round(center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /	293 (0.5f * sample_rate_hz_)));

290 (0.5f * sample_rate_hz_));	294 rr = static_cast<size_t>(round(

291 rrr = min(freqs_, max(rrr, 1)) - 1;	295 center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /

292 rr = min(freqs_, max(rr, 1)) - 1;	296 (0.5f * sample_rate_hz_)));

	297 rrr = min(freqs_, max(rrr, kOne)) - 1;

	298 rr = min(freqs_, max(rr, kOne)) - 1;

293	299

294 float step, element;	300 float step, element;

295	301

296 step = 1.0f / (ll - lll);	302 step = 1.0f / (ll - lll);

297 element = 0.0f;	303 element = 0.0f;

298 for (int j = lll; j <= ll; ++j) {	304 for (size_t j = lll; j <= ll; ++j) {

299 filter_bank_[i - 1][j] = element;	305 filter_bank_[i - 1][j] = element;

300 element += step;	306 element += step;

301 }	307 }

302 step = 1.0f / (rrr - rr);	308 step = 1.0f / (rrr - rr);

303 element = 1.0f;	309 element = 1.0f;

304 for (int j = rr; j <= rrr; ++j) {	310 for (size_t j = rr; j <= rrr; ++j) {

305 filter_bank_[i - 1][j] = element;	311 filter_bank_[i - 1][j] = element;

306 element -= step;	312 element -= step;

307 }	313 }

308 for (int j = ll; j <= rr; ++j) {	314 for (size_t j = ll; j <= rr; ++j) {

309 filter_bank_[i - 1][j] = 1.0f;	315 filter_bank_[i - 1][j] = 1.0f;

310 }	316 }

311 }	317 }

312	318

313 float sum;	319 float sum;

314 for (int i = 0; i < freqs_; ++i) {	320 for (size_t i = 0; i < freqs_; ++i) {

315 sum = 0.0f;	321 sum = 0.0f;

316 for (int j = 0; j < bank_size_; ++j) {	322 for (size_t j = 0; j < bank_size_; ++j) {

317 sum += filter_bank_[j][i];	323 sum += filter_bank_[j][i];

318 }	324 }

319 for (int j = 0; j < bank_size_; ++j) {	325 for (size_t j = 0; j < bank_size_; ++j) {

320 filter_bank_[j][i] /= sum;	326 filter_bank_[j][i] /= sum;

321 }	327 }

322 }	328 }

323 }	329 }

324	330

325 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,	331 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,

326 int start_freq,	332 size_t start_freq,

327 float* sols) {	333 float* sols) {

328 bool quadratic = (kConfigRho < 1.0f);	334 bool quadratic = (kConfigRho < 1.0f);

329 const float* var_x0 = filtered_clear_var_.get();	335 const float* var_x0 = filtered_clear_var_.get();

330 const float* var_n0 = filtered_noise_var_.get();	336 const float* var_n0 = filtered_noise_var_.get();

331	337

332 for (int n = 0; n < start_freq; ++n) {	338 for (size_t n = 0; n < start_freq; ++n) {

333 sols[n] = 1.0f;	339 sols[n] = 1.0f;

334 }	340 }

335	341

336 // Analytic solution for optimal gains. See paper for derivation.	342 // Analytic solution for optimal gains. See paper for derivation.

337 for (int n = start_freq - 1; n < bank_size_; ++n) {	343 for (size_t n = start_freq - 1; n < bank_size_; ++n) {

338 float alpha0, beta0, gamma0;	344 float alpha0, beta0, gamma0;

339 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +	345 gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +

340 lambda * var_x0[n] * var_n0[n] * var_n0[n];	346 lambda * var_x0[n] * var_n0[n] * var_n0[n];

341 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n];	347 beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n];

342 if (quadratic) {	348 if (quadratic) {

343 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n];	349 alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n];

344 sols[n] =	350 sols[n] =

345 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0);	351 (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0);

346 } else {	352 } else {

347 sols[n] = -gamma0 / beta0;	353 sols[n] = -gamma0 / beta0;

348 }	354 }

349 sols[n] = fmax(0, sols[n]);	355 sols[n] = fmax(0, sols[n]);

350 }	356 }

351 }	357 }

352	358

353 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {	359 void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {

354 DCHECK_GT(freqs_, 0);	360 DCHECK_GT(freqs_, 0u);

355 for (int i = 0; i < bank_size_; ++i) {	361 for (size_t i = 0; i < bank_size_; ++i) {

356 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);	362 result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);

357 }	363 }

358 }	364 }

359	365

360 float IntelligibilityEnhancer::DotProduct(const float* a,	366 float IntelligibilityEnhancer::DotProduct(const float* a,

361 const float* b,	367 const float* b,

362 int length) {	368 size_t length) {

363 float ret = 0.0f;	369 float ret = 0.0f;

364	370

365 for (int i = 0; i < length; ++i) {	371 for (size_t i = 0; i < length; ++i) {

366 ret = fmaf(a[i], b[i], ret);	372 ret = fmaf(a[i], b[i], ret);

367 }	373 }

368 return ret;	374 return ret;

369 }	375 }

370	376

371 bool IntelligibilityEnhancer::active() const {	377 bool IntelligibilityEnhancer::active() const {

372 return active_;	378 return active_;

373 }	379 }

374	380

375 } // namespace webrtc	381 } // namespace webrtc

OLD	NEW