Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc

Issue 1693823004: Use VAD to get a better speech power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@pow
Patch Set: Make gain change limit relative Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 //
12 // Implements helper functions and classes for intelligibility enhancement.
13 //
14
15 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h" 11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"
16 12
17 #include <math.h> 13 #include <math.h>
18 #include <stdlib.h> 14 #include <stdlib.h>
19 #include <string.h> 15 #include <string.h>
20 #include <algorithm> 16 #include <algorithm>
21 17 #include <limits>
22 using std::complex;
23 using std::min;
24 18
25 namespace webrtc { 19 namespace webrtc {
26 20
27 namespace intelligibility { 21 namespace intelligibility {
28 22
23 namespace {
24
25 // Return |current| changed towards |target|, with the relative change being at
26 // most |limit|.
29 float UpdateFactor(float target, float current, float limit) { 27 float UpdateFactor(float target, float current, float limit) {
30 float delta = fabsf(target - current); 28 float gain = target / (current + std::numeric_limits<float>::epsilon());
31 float sign = copysign(1.0f, target - current); 29 if (gain < 1.f - limit) {
32 return current + sign * fminf(delta, limit); 30 gain = 1.f - limit;
31 } else if (gain > 1.f + limit) {
32 gain = 1.f + limit;
33 }
34 return current * gain;
turaj 2016/02/19 16:48:47 I'm not sure if |current| could ever be zero, but
aluebs-webrtc 2016/02/19 19:30:48 |current| should never be zero, since it starts in
33 } 35 }
34 36
35 float AddDitherIfZero(float value) { 37 } // namespace
36 return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value;
37 }
38 38
39 complex<float> zerofudge(complex<float> c) { 39 PowerEstimator::PowerEstimator(size_t num_freqs, float decay)
40 return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag())); 40 : power_(num_freqs, 0.f), decay_(decay) {}
41 }
42 41
43 complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) { 42 GainApplier::GainApplier(size_t freqs, float relative_change_limit)
44 return mean + (data - mean) / static_cast<float>(count); 43 : num_freqs_(freqs),
45 } 44 relative_change_limit_(relative_change_limit),
46 45 target_(new float[freqs]()),
47 void AddToMean(complex<float> data, size_t count, complex<float>* mean) { 46 current_(new float[freqs]()) {
48 (*mean) = NewMean(*mean, data, count); 47 for (size_t i = 0; i < freqs; ++i) {
49 } 48 target_[i] = 1.f;
50 49 current_[i] = 1.f;
51
52 static const size_t kWindowBlockSize = 10;
53
54 VarianceArray::VarianceArray(size_t num_freqs,
55 StepType type,
56 size_t window_size,
57 float decay)
58 : running_mean_(new complex<float>[num_freqs]()),
59 running_mean_sq_(new complex<float>[num_freqs]()),
60 sub_running_mean_(new complex<float>[num_freqs]()),
61 sub_running_mean_sq_(new complex<float>[num_freqs]()),
62 variance_(new float[num_freqs]()),
63 conj_sum_(new float[num_freqs]()),
64 num_freqs_(num_freqs),
65 window_size_(window_size),
66 decay_(decay),
67 history_cursor_(0),
68 count_(0),
69 array_mean_(0.0f),
70 buffer_full_(false) {
71 history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
72 for (size_t i = 0; i < num_freqs_; ++i) {
73 history_[i].reset(new complex<float>[window_size_]());
74 }
75 subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
76 for (size_t i = 0; i < num_freqs_; ++i) {
77 subhistory_[i].reset(new complex<float>[window_size_]());
78 }
79 subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
80 for (size_t i = 0; i < num_freqs_; ++i) {
81 subhistory_sq_[i].reset(new complex<float>[window_size_]());
82 }
83 switch (type) {
84 case kStepInfinite:
85 step_func_ = &VarianceArray::InfiniteStep;
86 break;
87 case kStepDecaying:
88 step_func_ = &VarianceArray::DecayStep;
89 break;
90 case kStepWindowed:
91 step_func_ = &VarianceArray::WindowedStep;
92 break;
93 case kStepBlocked:
94 step_func_ = &VarianceArray::BlockedStep;
95 break;
96 case kStepBlockBasedMovingAverage:
97 step_func_ = &VarianceArray::BlockBasedMovingAverage;
98 break;
99 } 50 }
100 } 51 }
101 52
102 // Compute the variance with Welford's algorithm, adding some fudge to 53 void GainApplier::Apply(const std::complex<float>* in_block,
103 // the input in case of all-zeroes. 54 std::complex<float>* out_block) {
104 void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {
105 array_mean_ = 0.0f;
106 ++count_;
107 for (size_t i = 0; i < num_freqs_; ++i) { 55 for (size_t i = 0; i < num_freqs_; ++i) {
108 complex<float> sample = data[i]; 56 current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
109 if (!skip_fudge) { 57 out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
110 sample = zerofudge(sample);
111 }
112 if (count_ == 1) {
113 running_mean_[i] = sample;
114 variance_[i] = 0.0f;
115 } else {
116 float old_sum = conj_sum_[i];
117 complex<float> old_mean = running_mean_[i];
118 running_mean_[i] =
119 old_mean + (sample - old_mean) / static_cast<float>(count_);
120 conj_sum_[i] =
121 (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i]))
122 .real();
123 variance_[i] =
124 conj_sum_[i] / (count_ - 1);
125 }
126 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
127 } 58 }
128 } 59 }
129 60
130 // Compute the variance from the beginning, with exponential decaying of the
131 // series data.
132 void VarianceArray::DecayStep(const complex<float>* data, bool /*dummy*/) {
133 array_mean_ = 0.0f;
134 ++count_;
135 for (size_t i = 0; i < num_freqs_; ++i) {
136 complex<float> sample = data[i];
137 sample = zerofudge(sample);
138
139 if (count_ == 1) {
140 running_mean_[i] = sample;
141 running_mean_sq_[i] = sample * std::conj(sample);
142 variance_[i] = 0.0f;
143 } else {
144 complex<float> prev = running_mean_[i];
145 complex<float> prev2 = running_mean_sq_[i];
146 running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample;
147 running_mean_sq_[i] =
148 decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample);
149 variance_[i] = (running_mean_sq_[i] -
150 running_mean_[i] * std::conj(running_mean_[i])).real();
151 }
152
153 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
154 }
155 }
156
157 // Windowed variance computation. On each step, the variances for the
158 // window are recomputed from scratch, using Welford's algorithm.
159 void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) {
160 size_t num = min(count_ + 1, window_size_);
161 array_mean_ = 0.0f;
162 for (size_t i = 0; i < num_freqs_; ++i) {
163 complex<float> mean;
164 float conj_sum = 0.0f;
165
166 history_[i][history_cursor_] = data[i];
167
168 mean = history_[i][history_cursor_];
169 variance_[i] = 0.0f;
170 for (size_t j = 1; j < num; ++j) {
171 complex<float> sample =
172 zerofudge(history_[i][(history_cursor_ + j) % window_size_]);
173 sample = history_[i][(history_cursor_ + j) % window_size_];
174 float old_sum = conj_sum;
175 complex<float> old_mean = mean;
176
177 mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1);
178 conj_sum =
179 (old_sum + std::conj(sample - old_mean) * (sample - mean)).real();
180 variance_[i] = conj_sum / (j);
181 }
182 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
183 }
184 history_cursor_ = (history_cursor_ + 1) % window_size_;
185 ++count_;
186 }
187
188 // Variance with a window of blocks. Within each block, the variances are
189 // recomputed from scratch at every stp, using |Var(X) = E(X^2) - E^2(X)|.
190 // Once a block is filled with kWindowBlockSize samples, it is added to the
191 // history window and a new block is started. The variances for the window
192 // are recomputed from scratch at each of these transitions.
193 void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
194 size_t blocks = min(window_size_, history_cursor_ + 1);
195 for (size_t i = 0; i < num_freqs_; ++i) {
196 AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);
197 AddToMean(data[i] * std::conj(data[i]), count_ + 1,
198 &sub_running_mean_sq_[i]);
199 subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];
200 subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];
201
202 variance_[i] =
203 (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -
204 NewMean(running_mean_[i], sub_running_mean_[i], blocks) *
205 std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))
206 .real();
207 if (count_ == kWindowBlockSize - 1) {
208 sub_running_mean_[i] = complex<float>(0.0f, 0.0f);
209 sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
210 running_mean_[i] = complex<float>(0.0f, 0.0f);
211 running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
212 for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) {
213 AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);
214 AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);
215 }
216 ++history_cursor_;
217 }
218 }
219 ++count_;
220 if (count_ == kWindowBlockSize) {
221 count_ = 0;
222 }
223 }
224
225 // Recomputes variances for each window from scratch based on previous window.
226 void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,
227 bool /*dummy*/) {
228 // TODO(ekmeyerson) To mitigate potential divergence, add counter so that
229 // after every so often sums are computed scratch by summing over all
230 // elements instead of subtracting oldest and adding newest.
231 for (size_t i = 0; i < num_freqs_; ++i) {
232 sub_running_mean_[i] += data[i];
233 sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);
234 }
235 ++count_;
236
237 // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow
238 // experimentation with different block size,window size pairs.
239 if (count_ >= kWindowBlockSize) {
240 count_ = 0;
241
242 for (size_t i = 0; i < num_freqs_; ++i) {
243 running_mean_[i] -= subhistory_[i][history_cursor_];
244 running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];
245
246 float scale = 1.f / kWindowBlockSize;
247 subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;
248 subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;
249
250 sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);
251 sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);
252
253 running_mean_[i] += subhistory_[i][history_cursor_];
254 running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];
255
256 scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);
257 variance_[i] = std::real(running_mean_sq_[i] * scale -
258 running_mean_[i] * scale *
259 std::conj(running_mean_[i]) * scale);
260 }
261
262 ++history_cursor_;
263 if (history_cursor_ >= window_size_) {
264 buffer_full_ = true;
265 history_cursor_ = 0;
266 }
267 }
268 }
269
270 void VarianceArray::Clear() {
271 memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_);
272 memset(running_mean_sq_.get(), 0,
273 sizeof(*running_mean_sq_.get()) * num_freqs_);
274 memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_);
275 memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_);
276 history_cursor_ = 0;
277 count_ = 0;
278 array_mean_ = 0.0f;
279 }
280
281 void VarianceArray::ApplyScale(float scale) {
282 array_mean_ = 0.0f;
283 for (size_t i = 0; i < num_freqs_; ++i) {
284 variance_[i] *= scale * scale;
285 array_mean_ += (variance_[i] - array_mean_) / (i + 1);
286 }
287 }
288
289 GainApplier::GainApplier(size_t freqs, float change_limit)
290 : num_freqs_(freqs),
291 change_limit_(change_limit),
292 target_(new float[freqs]()),
293 current_(new float[freqs]()) {
294 for (size_t i = 0; i < freqs; ++i) {
295 target_[i] = 1.0f;
296 current_[i] = 1.0f;
297 }
298 }
299
300 void GainApplier::Apply(const complex<float>* in_block,
301 complex<float>* out_block) {
302 for (size_t i = 0; i < num_freqs_; ++i) {
303 float factor = sqrtf(fabsf(current_[i]));
304 if (!std::isnormal(factor)) {
305 factor = 1.0f;
306 }
307 out_block[i] = factor * in_block[i];
308 current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);
309 }
310 }
311
312 } // namespace intelligibility 61 } // namespace intelligibility
313 62
314 } // namespace webrtc 63 } // namespace webrtc
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698