webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc - Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc

Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ie

Patch Set: Naming Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //

12 // Implements helper functions and classes for intelligibility enhancement.

13 //

14

15 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

16	12

17 #include <math.h>	13 #include <cmath>

18 #include <stdlib.h>	14 #include <cstdlib>

19 #include <string.h>	15 #include <cstring>

20 #include <algorithm>	16 #include <algorithm>
	turaj 2016/02/12 23:12:35 I guess <algorithm> should come first, now. I guess <algorithm> should come first, now. aluebs-webrtc 2016/02/13 01:47:50 Well the styleguide says C includes should come be Show quoted text On 2016/02/12 23:12:35, turaj wrote: > I guess <algorithm> should come first, now. Well the styleguide says C includes should come before C++ ones, does this also apply when using <c> instead of <.h>. I am not sure. https://engdoc.corp.google.com/eng/doc/devguide/cpp/styleguide.shtml?cl=head#...
21	17

22 using std::complex;

23 using std::min;

24

25 namespace webrtc {	18 namespace webrtc {

26	19

27 namespace intelligibility {	20 namespace {

28	21

	22 // Return \|current\| changed towards \|target\|, with the change being at most

	23 // \|limit\|.

29 float UpdateFactor(float target, float current, float limit) {	24 float UpdateFactor(float target, float current, float limit) {

30 float delta = fabsf(target - current);	25 float delta = fabsf(target - current);

31 float sign = copysign(1.0f, target - current);	26 float sign = copysign(1.0f, target - current);

32 return current + sign * fminf(delta, limit);	27 return current + sign * fminf(delta, limit);

33 }	28 }

34	29

35 float AddDitherIfZero(float value) {	30 } // namespace

36 return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value;	31

	32 PowerEstimator::PowerEstimator(size_t num_freqs,

	33 float decay)

	34 : magnitude_(new float[num_freqs]()),

	35 power_(new float[num_freqs]()),

	36 num_freqs_(num_freqs),

	37 decay_(decay) {

	38 memset(magnitude_.get(), 0, sizeof(magnitude_.get()) num_freqs_);

	39 memset(power_.get(), 0, sizeof(power_.get()) num_freqs_);

37 }	40 }

38	41

39 complex<float> zerofudge(complex<float> c) {	42 // Compute the magnitude from the beginning, with exponential decaying of the

40 return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag()));	43 // series data.

41 }	44 void PowerEstimator::Step(const std::complex<float>* data) {

42

43 complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) {

44 return mean + (data - mean) / static_cast<float>(count);

45 }

46

47 void AddToMean(complex<float> data, size_t count, complex<float>* mean) {

48 (mean) = NewMean(mean, data, count);

49 }

50

51

52 static const size_t kWindowBlockSize = 10;

53

54 VarianceArray::VarianceArray(size_t num_freqs,

55 StepType type,

56 size_t window_size,

57 float decay)

58 : running_mean_(new complex<float>[num_freqs]()),

59 running_mean_sq_(new complex<float>[num_freqs]()),

60 sub_running_mean_(new complex<float>[num_freqs]()),

61 sub_running_mean_sq_(new complex<float>[num_freqs]()),

62 variance_(new float[num_freqs]()),

63 conj_sum_(new float[num_freqs]()),

64 num_freqs_(num_freqs),

65 window_size_(window_size),

66 decay_(decay),

67 history_cursor_(0),

68 count_(0),

69 array_mean_(0.0f),

70 buffer_full_(false) {

71 history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());

72 for (size_t i = 0; i < num_freqs_; ++i) {	45 for (size_t i = 0; i < num_freqs_; ++i) {

73 history_[i].reset(new complex<float>[window_size_]());	46 magnitude_[i] = decay_ * magnitude_[i] +

74 }	47 (1.0f - decay_) * std::abs(data[i]);

75 subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());

76 for (size_t i = 0; i < num_freqs_; ++i) {

77 subhistory_[i].reset(new complex<float>[window_size_]());

78 }

79 subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());

80 for (size_t i = 0; i < num_freqs_; ++i) {

81 subhistory_sq_[i].reset(new complex<float>[window_size_]());

82 }

83 switch (type) {

84 case kStepInfinite:

85 step_func_ = &VarianceArray::InfiniteStep;

86 break;

87 case kStepDecaying:

88 step_func_ = &VarianceArray::DecayStep;

89 break;

90 case kStepWindowed:

91 step_func_ = &VarianceArray::WindowedStep;

92 break;

93 case kStepBlocked:

94 step_func_ = &VarianceArray::BlockedStep;

95 break;

96 case kStepBlockBasedMovingAverage:

97 step_func_ = &VarianceArray::BlockBasedMovingAverage;

98 break;

99 }	48 }

100 }	49 }

101	50

102 // Compute the variance with Welford's algorithm, adding some fudge to	51 const float* PowerEstimator::Power() {

103 // the input in case of all-zeroes.

104 void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {

105 array_mean_ = 0.0f;

106 ++count_;

107 for (size_t i = 0; i < num_freqs_; ++i) {	52 for (size_t i = 0; i < num_freqs_; ++i) {

108 complex<float> sample = data[i];	53 power_[i] = magnitude_[i] * magnitude_[i];

109 if (!skip_fudge) {

110 sample = zerofudge(sample);

111 }

112 if (count_ == 1) {

113 running_mean_[i] = sample;

114 variance_[i] = 0.0f;

115 } else {

116 float old_sum = conj_sum_[i];

117 complex<float> old_mean = running_mean_[i];

118 running_mean_[i] =

119 old_mean + (sample - old_mean) / static_cast<float>(count_);

120 conj_sum_[i] =

121 (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i]))

122 .real();

123 variance_[i] =

124 conj_sum_[i] / (count_ - 1);

125 }

126 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

127 }	54 }

128 }	55 return &power_[0];

129

130 // Compute the variance from the beginning, with exponential decaying of the

131 // series data.

132 void VarianceArray::DecayStep(const complex<float>* data, bool /dummy/) {

133 array_mean_ = 0.0f;

134 ++count_;

135 for (size_t i = 0; i < num_freqs_; ++i) {

136 complex<float> sample = data[i];

137 sample = zerofudge(sample);

138

139 if (count_ == 1) {

140 running_mean_[i] = sample;

141 running_mean_sq_[i] = sample * std::conj(sample);

142 variance_[i] = 0.0f;

143 } else {

144 complex<float> prev = running_mean_[i];

145 complex<float> prev2 = running_mean_sq_[i];

146 running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample;

147 running_mean_sq_[i] =

148 decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample);

149 variance_[i] = (running_mean_sq_[i] -

150 running_mean_[i] * std::conj(running_mean_[i])).real();

151 }

152

153 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

154 }

155 }

156

157 // Windowed variance computation. On each step, the variances for the

158 // window are recomputed from scratch, using Welford's algorithm.

159 void VarianceArray::WindowedStep(const complex<float>* data, bool /dummy/) {

160 size_t num = min(count_ + 1, window_size_);

161 array_mean_ = 0.0f;

162 for (size_t i = 0; i < num_freqs_; ++i) {

163 complex<float> mean;

164 float conj_sum = 0.0f;

165

166 history_[i][history_cursor_] = data[i];

167

168 mean = history_[i][history_cursor_];

169 variance_[i] = 0.0f;

170 for (size_t j = 1; j < num; ++j) {

171 complex<float> sample =

172 zerofudge(history_[i][(history_cursor_ + j) % window_size_]);

173 sample = history_[i][(history_cursor_ + j) % window_size_];

174 float old_sum = conj_sum;

175 complex<float> old_mean = mean;

176

177 mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1);

178 conj_sum =

179 (old_sum + std::conj(sample - old_mean) * (sample - mean)).real();

180 variance_[i] = conj_sum / (j);

181 }

182 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

183 }

184 history_cursor_ = (history_cursor_ + 1) % window_size_;

185 ++count_;

186 }

187

188 // Variance with a window of blocks. Within each block, the variances are

189 // recomputed from scratch at every stp, using \|Var(X) = E(X^2) - E^2(X)\|.

190 // Once a block is filled with kWindowBlockSize samples, it is added to the

191 // history window and a new block is started. The variances for the window

192 // are recomputed from scratch at each of these transitions.

193 void VarianceArray::BlockedStep(const complex<float>* data, bool /dummy/) {

194 size_t blocks = min(window_size_, history_cursor_ + 1);

195 for (size_t i = 0; i < num_freqs_; ++i) {

196 AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);

197 AddToMean(data[i] * std::conj(data[i]), count_ + 1,

198 &sub_running_mean_sq_[i]);

199 subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];

200 subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];

201

202 variance_[i] =

203 (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -

204 NewMean(running_mean_[i], sub_running_mean_[i], blocks) *

205 std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))

206 .real();

207 if (count_ == kWindowBlockSize - 1) {

208 sub_running_mean_[i] = complex<float>(0.0f, 0.0f);

209 sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);

210 running_mean_[i] = complex<float>(0.0f, 0.0f);

211 running_mean_sq_[i] = complex<float>(0.0f, 0.0f);

212 for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) {

213 AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);

214 AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);

215 }

216 ++history_cursor_;

217 }

218 }

219 ++count_;

220 if (count_ == kWindowBlockSize) {

221 count_ = 0;

222 }

223 }

224

225 // Recomputes variances for each window from scratch based on previous window.

226 void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,

227 bool /dummy/) {

228 // TODO(ekmeyerson) To mitigate potential divergence, add counter so that

229 // after every so often sums are computed scratch by summing over all

230 // elements instead of subtracting oldest and adding newest.

231 for (size_t i = 0; i < num_freqs_; ++i) {

232 sub_running_mean_[i] += data[i];

233 sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);

234 }

235 ++count_;

236

237 // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow

238 // experimentation with different block size,window size pairs.

239 if (count_ >= kWindowBlockSize) {

240 count_ = 0;

241

242 for (size_t i = 0; i < num_freqs_; ++i) {

243 running_mean_[i] -= subhistory_[i][history_cursor_];

244 running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];

245

246 float scale = 1.f / kWindowBlockSize;

247 subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;

248 subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;

249

250 sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);

251 sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);

252

253 running_mean_[i] += subhistory_[i][history_cursor_];

254 running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];

255

256 scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);

257 variance_[i] = std::real(running_mean_sq_[i] * scale -

258 running_mean_[i] * scale *

259 std::conj(running_mean_[i]) * scale);

260 }

261

262 ++history_cursor_;

263 if (history_cursor_ >= window_size_) {

264 buffer_full_ = true;

265 history_cursor_ = 0;

266 }

267 }

268 }

269

270 void VarianceArray::Clear() {

271 memset(running_mean_.get(), 0, sizeof(running_mean_.get()) num_freqs_);

272 memset(running_mean_sq_.get(), 0,

273 sizeof(running_mean_sq_.get()) num_freqs_);

274 memset(variance_.get(), 0, sizeof(variance_.get()) num_freqs_);

275 memset(conj_sum_.get(), 0, sizeof(conj_sum_.get()) num_freqs_);

276 history_cursor_ = 0;

277 count_ = 0;

278 array_mean_ = 0.0f;

279 }

280

281 void VarianceArray::ApplyScale(float scale) {

282 array_mean_ = 0.0f;

283 for (size_t i = 0; i < num_freqs_; ++i) {

284 variance_[i] = scale scale;

285 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

286 }

287 }	56 }

288	57

289 GainApplier::GainApplier(size_t freqs, float change_limit)	58 GainApplier::GainApplier(size_t freqs, float change_limit)

290 : num_freqs_(freqs),	59 : num_freqs_(freqs),

291 change_limit_(change_limit),	60 change_limit_(change_limit),

292 target_(new float[freqs]()),	61 target_(new float[freqs]()),

293 current_(new float[freqs]()) {	62 current_(new float[freqs]()) {

294 for (size_t i = 0; i < freqs; ++i) {	63 for (size_t i = 0; i < freqs; ++i) {

295 target_[i] = 1.0f;	64 target_[i] = 1.0f;

296 current_[i] = 1.0f;	65 current_[i] = 1.0f;

297 }	66 }

298 }	67 }

299	68

300 void GainApplier::Apply(const complex<float>* in_block,	69 void GainApplier::Apply(const std::complex<float>* in_block,

301 complex<float>* out_block) {	70 std::complex<float>* out_block) {

302 for (size_t i = 0; i < num_freqs_; ++i) {	71 for (size_t i = 0; i < num_freqs_; ++i) {

303 float factor = sqrtf(fabsf(current_[i]));	72 float factor = sqrtf(fabsf(current_[i]));

304 if (!std::isnormal(factor)) {	73 if (!std::isnormal(factor)) {

305 factor = 1.0f;	74 factor = 1.f;
	turaj 2016/02/12 23:12:35 Please keep 1.0f for consistency, as in lines 64 a Please keep 1.0f for consistency, as in lines 64 and 65 it is 1.0f aluebs-webrtc 2016/02/13 01:47:49 I changed all occurrences in this file to .f. Show quoted text On 2016/02/12 23:12:35, turaj wrote: > Please keep 1.0f for consistency, as in lines 64 and 65 it is 1.0f I changed all occurrences in this file to .f.
306 }	75 }

307 out_block[i] = factor * in_block[i];	76 out_block[i] = factor * in_block[i];

308 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);	77 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);

309 }	78 }

310 }	79 }

311	80

312 } // namespace intelligibility

313

314 } // namespace webrtc	81 } // namespace webrtc

OLD	NEW