webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc - Issue 1207353002: Add new variance update option and unittests for intelligibility

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc

Issue 1207353002: Add new variance update option and unittests for intelligibility (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Merge Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //	11 //

12 // Implements helper functions and classes for intelligibility enhancement.	12 // Implements helper functions and classes for intelligibility enhancement.

13 //	13 //

14	14

15 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	15 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

16	16

	17 #include <math.h>

	18 #include <string.h>

17 #include <algorithm>	19 #include <algorithm>

18 #include <cmath>

19 #include <cstring>

20	20

21 using std::complex;	21 using std::complex;

	22 using std::min;

22	23

23 namespace {	24 namespace webrtc {

24	25

25 // Return \|current\| changed towards \|target\|, with the change being at most	26 namespace intelligibility {

26 // \|limit\|.	27

27 inline float UpdateFactor(float target, float current, float limit) {	28 float UpdateFactor(float target, float current, float limit) {

28 float delta = fabsf(target - current);	29 float delta = fabsf(target - current);

29 float sign = copysign(1.0f, target - current);	30 float sign = copysign(1.0f, target - current);

30 return current + sign * fminf(delta, limit);	31 return current + sign * fminf(delta, limit);

31 }	32 }

32	33

33 // std::isfinite for complex numbers.	34 bool cplxfinite(complex<float> c) {

34 inline bool cplxfinite(complex<float> c) {

35 return std::isfinite(c.real()) && std::isfinite(c.imag());	35 return std::isfinite(c.real()) && std::isfinite(c.imag());

36 }	36 }

37	37

38 // std::isnormal for complex numbers.	38 bool cplxnormal(complex<float> c) {

39 inline bool cplxnormal(complex<float> c) {

40 return std::isnormal(c.real()) && std::isnormal(c.imag());	39 return std::isnormal(c.real()) && std::isnormal(c.imag());

41 }	40 }

42	41

43 // Apply a small fudge to degenerate complex values. The numbers in the array	42 complex<float> zerofudge(complex<float> c) {

44 // were chosen randomly, so that even a series of all zeroes has some small

45 // variability.

46 inline complex<float> zerofudge(complex<float> c) {

47 const static complex<float> fudge[7] = {{0.001f, 0.002f},	43 const static complex<float> fudge[7] = {{0.001f, 0.002f},

48 {0.008f, 0.001f},	44 {0.008f, 0.001f},

49 {0.003f, 0.008f},	45 {0.003f, 0.008f},

50 {0.0006f, 0.0009f},	46 {0.0006f, 0.0009f},

51 {0.001f, 0.004f},	47 {0.001f, 0.004f},

52 {0.003f, 0.004f},	48 {0.003f, 0.004f},

53 {0.002f, 0.009f}};	49 {0.002f, 0.009f}};

54 static int fudge_index = 0;	50 static int fudge_index = 0;

55 if (cplxfinite(c) && !cplxnormal(c)) {	51 if (cplxfinite(c) && !cplxnormal(c)) {

56 fudge_index = (fudge_index + 1) % 7;	52 fudge_index = (fudge_index + 1) % 7;

57 return c + fudge[fudge_index];	53 return c + fudge[fudge_index];

58 }	54 }

59 return c;	55 return c;

60 }	56 }

61	57

62 // Incremental mean computation. Return the mean of the series with the	58 complex<float> NewMean(complex<float> mean, complex<float> data, int count) {

63 // mean \|mean\| with added \|data\|.

64 inline complex<float> NewMean(complex<float> mean,

65 complex<float> data,

66 int count) {

67 return mean + (data - mean) / static_cast<float>(count);	59 return mean + (data - mean) / static_cast<float>(count);

68 }	60 }

69	61

70 inline void AddToMean(complex<float> data, int count, complex<float>* mean) {	62 void AddToMean(complex<float> data, int count, complex<float>* mean) {

71 (mean) = NewMean(mean, data, count);	63 (mean) = NewMean(mean, data, count);

72 }	64 }

73	65

74 } // namespace

75

76 using std::min;

77

78 namespace webrtc {

79

80 namespace intelligibility {

81	66

82 static const int kWindowBlockSize = 10;	67 static const int kWindowBlockSize = 10;

83	68

84 VarianceArray::VarianceArray(int freqs,	69 VarianceArray::VarianceArray(int freqs,

85 StepType type,	70 StepType type,

86 int window_size,	71 int window_size,

87 float decay)	72 float decay)

88 : running_mean_(new complex<float>[freqs]()),	73 : running_mean_(new complex<float>[freqs]()),

89 running_mean_sq_(new complex<float>[freqs]()),	74 running_mean_sq_(new complex<float>[freqs]()),

90 sub_running_mean_(new complex<float>[freqs]()),	75 sub_running_mean_(new complex<float>[freqs]()),

91 sub_running_mean_sq_(new complex<float>[freqs]()),	76 sub_running_mean_sq_(new complex<float>[freqs]()),

92 variance_(new float[freqs]()),	77 variance_(new float[freqs]()),

93 conj_sum_(new float[freqs]()),	78 conj_sum_(new float[freqs]()),

94 freqs_(freqs),	79 freqs_(freqs),

95 window_size_(window_size),	80 window_size_(window_size),

96 decay_(decay),	81 decay_(decay),

97 history_cursor_(0),	82 history_cursor_(0),

98 count_(0),	83 count_(0),

99 array_mean_(0.0f) {	84 array_mean_(0.0f),

	85 buffer_full_(false) {

100 history_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());	86 history_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());

101 for (int i = 0; i < freqs_; ++i) {	87 for (int i = 0; i < freqs_; ++i) {

102 history_[i].reset(new complex<float>[window_size_]());	88 history_[i].reset(new complex<float>[window_size_]());

103 }	89 }

104 subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());	90 subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());

105 for (int i = 0; i < freqs_; ++i) {	91 for (int i = 0; i < freqs_; ++i) {

106 subhistory_[i].reset(new complex<float>[window_size_]());	92 subhistory_[i].reset(new complex<float>[window_size_]());

107 }	93 }

108 subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());	94 subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());

109 for (int i = 0; i < freqs_; ++i) {	95 for (int i = 0; i < freqs_; ++i) {

110 subhistory_sq_[i].reset(new complex<float>[window_size_]());	96 subhistory_sq_[i].reset(new complex<float>[window_size_]());

111 }	97 }

112 switch (type) {	98 switch (type) {

113 case kStepInfinite:	99 case kStepInfinite:

114 step_func_ = &VarianceArray::InfiniteStep;	100 step_func_ = &VarianceArray::InfiniteStep;

115 break;	101 break;

116 case kStepDecaying:	102 case kStepDecaying:

117 step_func_ = &VarianceArray::DecayStep;	103 step_func_ = &VarianceArray::DecayStep;

118 break;	104 break;

119 case kStepWindowed:	105 case kStepWindowed:

120 step_func_ = &VarianceArray::WindowedStep;	106 step_func_ = &VarianceArray::WindowedStep;

121 break;	107 break;

122 case kStepBlocked:	108 case kStepBlocked:

123 step_func_ = &VarianceArray::BlockedStep;	109 step_func_ = &VarianceArray::BlockedStep;

124 break;	110 break;

	111 case kStepBlockBasedMovingAverage:

	112 step_func_ = &VarianceArray::BlockBasedMovingAverage;

	113 break;

125 }	114 }

126 }	115 }

127	116

128 // Compute the variance with Welford's algorithm, adding some fudge to	117 // Compute the variance with Welford's algorithm, adding some fudge to

129 // the input in case of all-zeroes.	118 // the input in case of all-zeroes.

130 void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {	119 void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {

131 array_mean_ = 0.0f;	120 array_mean_ = 0.0f;

132 ++count_;	121 ++count_;

133 for (int i = 0; i < freqs_; ++i) {	122 for (int i = 0; i < freqs_; ++i) {

134 complex<float> sample = data[i];	123 complex<float> sample = data[i];

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
216 history_cursor_ = (history_cursor_ + 1) % window_size_;	205 history_cursor_ = (history_cursor_ + 1) % window_size_;

217 ++count_;	206 ++count_;

218 }	207 }

219	208

220 // Variance with a window of blocks. Within each block, the variances are	209 // Variance with a window of blocks. Within each block, the variances are

221 // recomputed from scratch at every stp, using \|Var(X) = E(X^2) - E^2(X)\|.	210 // recomputed from scratch at every stp, using \|Var(X) = E(X^2) - E^2(X)\|.

222 // Once a block is filled with kWindowBlockSize samples, it is added to the	211 // Once a block is filled with kWindowBlockSize samples, it is added to the

223 // history window and a new block is started. The variances for the window	212 // history window and a new block is started. The variances for the window

224 // are recomputed from scratch at each of these transitions.	213 // are recomputed from scratch at each of these transitions.

225 void VarianceArray::BlockedStep(const complex<float>* data, bool /dummy/) {	214 void VarianceArray::BlockedStep(const complex<float>* data, bool /dummy/) {

226 int blocks = min(window_size_, history_cursor_);	215 int blocks = min(window_size_, history_cursor_ + 1);

227 for (int i = 0; i < freqs_; ++i) {	216 for (int i = 0; i < freqs_; ++i) {

228 AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);	217 AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);

229 AddToMean(data[i] * std::conj(data[i]), count_ + 1,	218 AddToMean(data[i] * std::conj(data[i]), count_ + 1,

230 &sub_running_mean_sq_[i]);	219 &sub_running_mean_sq_[i]);

231 subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];	220 subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];

232 subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];	221 subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];

233	222

234 variance_[i] =	223 variance_[i] =

235 (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -	224 (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -

236 NewMean(running_mean_[i], sub_running_mean_[i], blocks) *	225 NewMean(running_mean_[i], sub_running_mean_[i], blocks) *

237 std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))	226 std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))

238 .real();	227 .real();

239 if (count_ == kWindowBlockSize - 1) {	228 if (count_ == kWindowBlockSize - 1) {

240 sub_running_mean_[i] = complex<float>(0.0f, 0.0f);	229 sub_running_mean_[i] = complex<float>(0.0f, 0.0f);

241 sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);	230 sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);

242 running_mean_[i] = complex<float>(0.0f, 0.0f);	231 running_mean_[i] = complex<float>(0.0f, 0.0f);

243 running_mean_sq_[i] = complex<float>(0.0f, 0.0f);	232 running_mean_sq_[i] = complex<float>(0.0f, 0.0f);

244 for (int j = 0; j < min(window_size_, history_cursor_); ++j) {	233 for (int j = 0; j < min(window_size_, history_cursor_); ++j) {

245 AddToMean(subhistory_[i][j], j, &running_mean_[i]);	234 AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);

246 AddToMean(subhistory_sq_[i][j], j, &running_mean_sq_[i]);	235 AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);

247 }	236 }

248 ++history_cursor_;	237 ++history_cursor_;

249 }	238 }

250 }	239 }

251 ++count_;	240 ++count_;

252 if (count_ == kWindowBlockSize) {	241 if (count_ == kWindowBlockSize) {

253 count_ = 0;	242 count_ = 0;

254 }	243 }

255 }	244 }

256	245

	246 // Recomputes variances for each window from scratch based on previous window.

	247 void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,

	248 bool /dummy/) {

	249 // TODO(ekmeyerson) To mitigate potential divergence, add counter so that

	250 // after every so often sums are computed scratch by summing over all

	251 // elements instead of subtracting oldest and adding newest.

	252 for (int i = 0; i < freqs_; ++i) {

	253 sub_running_mean_[i] += data[i];

	254 sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);

	255 }

	256 ++count_;

	257

	258 // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow

	259 // experimentation with different block size,window size pairs.

	260 if (count_ >= kWindowBlockSize) {

	261 count_ = 0;

	262

	263 for (int i = 0; i < freqs_; ++i) {

	264 running_mean_[i] -= subhistory_[i][history_cursor_];

	265 running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];

	266

	267 float scale = 1.f / kWindowBlockSize;

	268 subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;

	269 subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;

	270

	271 sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);

	272 sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);

	273

	274 running_mean_[i] += subhistory_[i][history_cursor_];

	275 running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];

	276

	277 scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);

	278 variance_[i] = std::real(running_mean_sq_[i] * scale -

	279 running_mean_[i] * scale *

	280 std::conj(running_mean_[i]) * scale);

	281 }

	282

	283 ++history_cursor_;

	284 if (history_cursor_ >= window_size_) {

	285 buffer_full_ = true;

	286 history_cursor_ = 0;

	287 }

	288 }

	289 }

	290

257 void VarianceArray::Clear() {	291 void VarianceArray::Clear() {

258 memset(running_mean_.get(), 0, sizeof(running_mean_.get()) freqs_);	292 memset(running_mean_.get(), 0, sizeof(running_mean_.get()) freqs_);

259 memset(running_mean_sq_.get(), 0, sizeof(running_mean_sq_.get()) freqs_);	293 memset(running_mean_sq_.get(), 0, sizeof(running_mean_sq_.get()) freqs_);

260 memset(variance_.get(), 0, sizeof(variance_.get()) freqs_);	294 memset(variance_.get(), 0, sizeof(variance_.get()) freqs_);

261 memset(conj_sum_.get(), 0, sizeof(conj_sum_.get()) freqs_);	295 memset(conj_sum_.get(), 0, sizeof(conj_sum_.get()) freqs_);

262 history_cursor_ = 0;	296 history_cursor_ = 0;

263 count_ = 0;	297 count_ = 0;

264 array_mean_ = 0.0f;	298 array_mean_ = 0.0f;

265 }	299 }

266	300

(...skipping 24 matching lines...) Expand all Loading...
291 factor = 1.0f;	325 factor = 1.0f;

292 }	326 }

293 out_block[i] = factor * in_block[i];	327 out_block[i] = factor * in_block[i];

294 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);	328 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);

295 }	329 }

296 }	330 }

297	331

298 } // namespace intelligibility	332 } // namespace intelligibility

299	333

300 } // namespace webrtc	334 } // namespace webrtc

OLD	NEW