webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc - Issue 1693823004: Use VAD to get a better speech power estimation in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc

Issue 1693823004: Use VAD to get a better speech power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@pow

Patch Set: Make gain change limit relative Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //

12 // Implements helper functions and classes for intelligibility enhancement.

13 //

14

15 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

16	12

17 #include <math.h>	13 #include <math.h>

18 #include <stdlib.h>	14 #include <stdlib.h>

19 #include <string.h>	15 #include <string.h>

20 #include <algorithm>	16 #include <algorithm>

21	17 #include <limits>

22 using std::complex;

23 using std::min;

24	18

25 namespace webrtc {	19 namespace webrtc {

26	20

27 namespace intelligibility {	21 namespace intelligibility {

28	22

	23 namespace {

	24

	25 // Return \|current\| changed towards \|target\|, with the relative change being at

	26 // most \|limit\|.

29 float UpdateFactor(float target, float current, float limit) {	27 float UpdateFactor(float target, float current, float limit) {

30 float delta = fabsf(target - current);	28 float gain = target / (current + std::numeric_limits<float>::epsilon());

31 float sign = copysign(1.0f, target - current);	29 if (gain < 1.f - limit) {

32 return current + sign * fminf(delta, limit);	30 gain = 1.f - limit;

	31 } else if (gain > 1.f + limit) {

	32 gain = 1.f + limit;

	33 }

	34 return current * gain;
	turaj 2016/02/19 16:48:47 I'm not sure if \|current\| could ever be zero, but I'm not sure if \|current\| could ever be zero, but if it is then this way of update will never recover it, how about just changing the last line of the previous implementation to return current + sign * fminf(delta, fmaxf(limit * current, epsilon())) aluebs-webrtc 2016/02/19 19:30:48 \|current\| should never be zero, since it starts in Show quoted text On 2016/02/19 16:48:47, turaj wrote: > I'm not sure if \|current\| could ever be zero, but if it is then this way of > update will never recover it, how about just changing the last line of the > previous implementation to > > return current + sign * fminf(delta, fmaxf(limit * current, epsilon())) \|current\| should never be zero, since it starts in 1.f and gets multiplied by gains between (1.f - limit) and (1.f + limit), but I see how it could get really close. What I fear from your suggestion is that it could return negative numbers when \|target\| is zero and \|current\| is smaller than epsilon. I added epsilon to the return value to my proposal to make sure current never becomes zero. WDYT?
33 }	35 }

34	36

35 float AddDitherIfZero(float value) {	37 } // namespace

36 return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value;

37 }

38	38

39 complex<float> zerofudge(complex<float> c) {	39 PowerEstimator::PowerEstimator(size_t num_freqs, float decay)

40 return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag()));	40 : power_(num_freqs, 0.f), decay_(decay) {}

41 }

42	41

43 complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) {	42 GainApplier::GainApplier(size_t freqs, float relative_change_limit)

44 return mean + (data - mean) / static_cast<float>(count);	43 : num_freqs_(freqs),

45 }	44 relative_change_limit_(relative_change_limit),

46	45 target_(new float[freqs]()),

47 void AddToMean(complex<float> data, size_t count, complex<float>* mean) {	46 current_(new float[freqs]()) {

48 (mean) = NewMean(mean, data, count);	47 for (size_t i = 0; i < freqs; ++i) {

49 }	48 target_[i] = 1.f;

50	49 current_[i] = 1.f;

51

52 static const size_t kWindowBlockSize = 10;

53

54 VarianceArray::VarianceArray(size_t num_freqs,

55 StepType type,

56 size_t window_size,

57 float decay)

58 : running_mean_(new complex<float>[num_freqs]()),

59 running_mean_sq_(new complex<float>[num_freqs]()),

60 sub_running_mean_(new complex<float>[num_freqs]()),

61 sub_running_mean_sq_(new complex<float>[num_freqs]()),

62 variance_(new float[num_freqs]()),

63 conj_sum_(new float[num_freqs]()),

64 num_freqs_(num_freqs),

65 window_size_(window_size),

66 decay_(decay),

67 history_cursor_(0),

68 count_(0),

69 array_mean_(0.0f),

70 buffer_full_(false) {

71 history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());

72 for (size_t i = 0; i < num_freqs_; ++i) {

73 history_[i].reset(new complex<float>[window_size_]());

74 }

75 subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());

76 for (size_t i = 0; i < num_freqs_; ++i) {

77 subhistory_[i].reset(new complex<float>[window_size_]());

78 }

79 subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());

80 for (size_t i = 0; i < num_freqs_; ++i) {

81 subhistory_sq_[i].reset(new complex<float>[window_size_]());

82 }

83 switch (type) {

84 case kStepInfinite:

85 step_func_ = &VarianceArray::InfiniteStep;

86 break;

87 case kStepDecaying:

88 step_func_ = &VarianceArray::DecayStep;

89 break;

90 case kStepWindowed:

91 step_func_ = &VarianceArray::WindowedStep;

92 break;

93 case kStepBlocked:

94 step_func_ = &VarianceArray::BlockedStep;

95 break;

96 case kStepBlockBasedMovingAverage:

97 step_func_ = &VarianceArray::BlockBasedMovingAverage;

98 break;

99 }	50 }

100 }	51 }

101	52

102 // Compute the variance with Welford's algorithm, adding some fudge to	53 void GainApplier::Apply(const std::complex<float>* in_block,

103 // the input in case of all-zeroes.	54 std::complex<float>* out_block) {

104 void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {

105 array_mean_ = 0.0f;

106 ++count_;

107 for (size_t i = 0; i < num_freqs_; ++i) {	55 for (size_t i = 0; i < num_freqs_; ++i) {

108 complex<float> sample = data[i];	56 current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);

109 if (!skip_fudge) {	57 out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];

110 sample = zerofudge(sample);

111 }

112 if (count_ == 1) {

113 running_mean_[i] = sample;

114 variance_[i] = 0.0f;

115 } else {

116 float old_sum = conj_sum_[i];

117 complex<float> old_mean = running_mean_[i];

118 running_mean_[i] =

119 old_mean + (sample - old_mean) / static_cast<float>(count_);

120 conj_sum_[i] =

121 (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i]))

122 .real();

123 variance_[i] =

124 conj_sum_[i] / (count_ - 1);

125 }

126 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

127 }	58 }

128 }	59 }

129	60

130 // Compute the variance from the beginning, with exponential decaying of the

131 // series data.

132 void VarianceArray::DecayStep(const complex<float>* data, bool /dummy/) {

133 array_mean_ = 0.0f;

134 ++count_;

135 for (size_t i = 0; i < num_freqs_; ++i) {

136 complex<float> sample = data[i];

137 sample = zerofudge(sample);

138

139 if (count_ == 1) {

140 running_mean_[i] = sample;

141 running_mean_sq_[i] = sample * std::conj(sample);

142 variance_[i] = 0.0f;

143 } else {

144 complex<float> prev = running_mean_[i];

145 complex<float> prev2 = running_mean_sq_[i];

146 running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample;

147 running_mean_sq_[i] =

148 decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample);

149 variance_[i] = (running_mean_sq_[i] -

150 running_mean_[i] * std::conj(running_mean_[i])).real();

151 }

152

153 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

154 }

155 }

156

157 // Windowed variance computation. On each step, the variances for the

158 // window are recomputed from scratch, using Welford's algorithm.

159 void VarianceArray::WindowedStep(const complex<float>* data, bool /dummy/) {

160 size_t num = min(count_ + 1, window_size_);

161 array_mean_ = 0.0f;

162 for (size_t i = 0; i < num_freqs_; ++i) {

163 complex<float> mean;

164 float conj_sum = 0.0f;

165

166 history_[i][history_cursor_] = data[i];

167

168 mean = history_[i][history_cursor_];

169 variance_[i] = 0.0f;

170 for (size_t j = 1; j < num; ++j) {

171 complex<float> sample =

172 zerofudge(history_[i][(history_cursor_ + j) % window_size_]);

173 sample = history_[i][(history_cursor_ + j) % window_size_];

174 float old_sum = conj_sum;

175 complex<float> old_mean = mean;

176

177 mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1);

178 conj_sum =

179 (old_sum + std::conj(sample - old_mean) * (sample - mean)).real();

180 variance_[i] = conj_sum / (j);

181 }

182 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

183 }

184 history_cursor_ = (history_cursor_ + 1) % window_size_;

185 ++count_;

186 }

187

188 // Variance with a window of blocks. Within each block, the variances are

189 // recomputed from scratch at every stp, using \|Var(X) = E(X^2) - E^2(X)\|.

190 // Once a block is filled with kWindowBlockSize samples, it is added to the

191 // history window and a new block is started. The variances for the window

192 // are recomputed from scratch at each of these transitions.

193 void VarianceArray::BlockedStep(const complex<float>* data, bool /dummy/) {

194 size_t blocks = min(window_size_, history_cursor_ + 1);

195 for (size_t i = 0; i < num_freqs_; ++i) {

196 AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);

197 AddToMean(data[i] * std::conj(data[i]), count_ + 1,

198 &sub_running_mean_sq_[i]);

199 subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];

200 subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];

201

202 variance_[i] =

203 (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -

204 NewMean(running_mean_[i], sub_running_mean_[i], blocks) *

205 std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))

206 .real();

207 if (count_ == kWindowBlockSize - 1) {

208 sub_running_mean_[i] = complex<float>(0.0f, 0.0f);

209 sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);

210 running_mean_[i] = complex<float>(0.0f, 0.0f);

211 running_mean_sq_[i] = complex<float>(0.0f, 0.0f);

212 for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) {

213 AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);

214 AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);

215 }

216 ++history_cursor_;

217 }

218 }

219 ++count_;

220 if (count_ == kWindowBlockSize) {

221 count_ = 0;

222 }

223 }

224

225 // Recomputes variances for each window from scratch based on previous window.

226 void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,

227 bool /dummy/) {

228 // TODO(ekmeyerson) To mitigate potential divergence, add counter so that

229 // after every so often sums are computed scratch by summing over all

230 // elements instead of subtracting oldest and adding newest.

231 for (size_t i = 0; i < num_freqs_; ++i) {

232 sub_running_mean_[i] += data[i];

233 sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);

234 }

235 ++count_;

236

237 // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow

238 // experimentation with different block size,window size pairs.

239 if (count_ >= kWindowBlockSize) {

240 count_ = 0;

241

242 for (size_t i = 0; i < num_freqs_; ++i) {

243 running_mean_[i] -= subhistory_[i][history_cursor_];

244 running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];

245

246 float scale = 1.f / kWindowBlockSize;

247 subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;

248 subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;

249

250 sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);

251 sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);

252

253 running_mean_[i] += subhistory_[i][history_cursor_];

254 running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];

255

256 scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);

257 variance_[i] = std::real(running_mean_sq_[i] * scale -

258 running_mean_[i] * scale *

259 std::conj(running_mean_[i]) * scale);

260 }

261

262 ++history_cursor_;

263 if (history_cursor_ >= window_size_) {

264 buffer_full_ = true;

265 history_cursor_ = 0;

266 }

267 }

268 }

269

270 void VarianceArray::Clear() {

271 memset(running_mean_.get(), 0, sizeof(running_mean_.get()) num_freqs_);

272 memset(running_mean_sq_.get(), 0,

273 sizeof(running_mean_sq_.get()) num_freqs_);

274 memset(variance_.get(), 0, sizeof(variance_.get()) num_freqs_);

275 memset(conj_sum_.get(), 0, sizeof(conj_sum_.get()) num_freqs_);

276 history_cursor_ = 0;

277 count_ = 0;

278 array_mean_ = 0.0f;

279 }

280

281 void VarianceArray::ApplyScale(float scale) {

282 array_mean_ = 0.0f;

283 for (size_t i = 0; i < num_freqs_; ++i) {

284 variance_[i] = scale scale;

285 array_mean_ += (variance_[i] - array_mean_) / (i + 1);

286 }

287 }

288

289 GainApplier::GainApplier(size_t freqs, float change_limit)

290 : num_freqs_(freqs),

291 change_limit_(change_limit),

292 target_(new float[freqs]()),

293 current_(new float[freqs]()) {

294 for (size_t i = 0; i < freqs; ++i) {

295 target_[i] = 1.0f;

296 current_[i] = 1.0f;

297 }

298 }

299

300 void GainApplier::Apply(const complex<float>* in_block,

301 complex<float>* out_block) {

302 for (size_t i = 0; i < num_freqs_; ++i) {

303 float factor = sqrtf(fabsf(current_[i]));

304 if (!std::isnormal(factor)) {

305 factor = 1.0f;

306 }

307 out_block[i] = factor * in_block[i];

308 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);

309 }

310 }

311

312 } // namespace intelligibility	61 } // namespace intelligibility

313	62

314 } // namespace webrtc	63 } // namespace webrtc

OLD	NEW