webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h - Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h

Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ie

Patch Set: Address turajs comments Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //

12 // Specifies helper classes for intelligibility enhancement.

13 //

14

15 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_

16 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_

17	13

18 #include <complex>	14 #include <complex>

19	15

20 #include "webrtc/base/scoped_ptr.h"	16 #include "webrtc/base/scoped_ptr.h"

21	17

22 namespace webrtc {	18 namespace webrtc {

23	19

24 namespace intelligibility {	20 // Internal helper for computing the power of a stream of arrays.

	21 // The result is an array of power per position: the i-th power is the power of

	22 // the stream of data on the i-th positions in the input arrays.

	23 class PowerEstimator {

	24 public:

	25 // Construct an instance for the given input array length (\|freqs\|), with the

	26 // appropriate parameters. \|decay\| is the forgetting factor.

	27 PowerEstimator(size_t freqs, float decay);

25	28

26 // Return \|current\| changed towards \|target\|, with the change being at most	29 // Add a new data point to the series.

27 // \|limit\|.	30 void Step(const std::complex<float>* data);

28 float UpdateFactor(float target, float current, float limit);

29	31

30 // Apply a small fudge to degenerate complex values. The numbers in the array	32 // The current power array.

31 // were chosen randomly, so that even a series of all zeroes has some small	33 const float* Power();

32 // variability.

33 std::complex<float> zerofudge(std::complex<float> c);

34

35 // Incremental mean computation. Return the mean of the series with the

36 // mean \|mean\| with added \|data\|.

37 std::complex<float> NewMean(std::complex<float> mean,

38 std::complex<float> data,

39 size_t count);

40

41 // Updates \|mean\| with added \|data\|;

42 void AddToMean(std::complex<float> data,

43 size_t count,

44 std::complex<float>* mean);

45

46 // Internal helper for computing the variances of a stream of arrays.

47 // The result is an array of variances per position: the i-th variance

48 // is the variance of the stream of data on the i-th positions in the

49 // input arrays.

50 // There are four methods of computation:

51 // * kStepInfinite computes variances from the beginning onwards

52 // * kStepDecaying uses a recursive exponential decay formula with a

53 // settable forgetting factor

54 // * kStepWindowed computes variances within a moving window

55 // * kStepBlocked is similar to kStepWindowed, but history is kept

56 // as a rolling window of blocks: multiple input elements are used for

57 // one block and the history then consists of the variances of these blocks

58 // with the same effect as kStepWindowed, but less storage, so the window

59 // can be longer

60 class VarianceArray {

61 public:

62 enum StepType {

63 kStepInfinite = 0,

64 kStepDecaying,

65 kStepWindowed,

66 kStepBlocked,

67 kStepBlockBasedMovingAverage

68 };

69

70 // Construct an instance for the given input array length (\|freqs\|) and

71 // computation algorithm (\|type\|), with the appropriate parameters.

72 // \|window_size\| is the number of samples for kStepWindowed and

73 // the number of blocks for kStepBlocked. \|decay\| is the forgetting factor

74 // for kStepDecaying.

75 VarianceArray(size_t freqs, StepType type, size_t window_size, float decay);

76

77 // Add a new data point to the series and compute the new variances.

78 // TODO(bercic) \|skip_fudge\| is a flag for kStepWindowed and kStepDecaying,

79 // whether they should skip adding some small dummy values to the input

80 // to prevent problems with all-zero inputs. Can probably be removed.

81 void Step(const std::complex<float>* data, bool skip_fudge = false) {

82 (this->*step_func_)(data, skip_fudge);

83 }

84 // Reset variances to zero and forget all history.

85 void Clear();

86 // Scale the input data by \|scale\|. Effectively multiply variances

87 // by \|scale^2\|.

88 void ApplyScale(float scale);

89

90 // The current set of variances.

91 const float* variance() const { return variance_.get(); }

92

93 // The mean value of the current set of variances.

94 float array_mean() const { return array_mean_; }

95	34

96 private:	35 private:

97 void InfiniteStep(const std::complex<float>* data, bool dummy);

98 void DecayStep(const std::complex<float>* data, bool dummy);

99 void WindowedStep(const std::complex<float>* data, bool dummy);

100 void BlockedStep(const std::complex<float>* data, bool dummy);

101 void BlockBasedMovingAverage(const std::complex<float>* data, bool dummy);

102

103 // TODO(ekmeyerson): Switch the following running means	36 // TODO(ekmeyerson): Switch the following running means

104 // and histories from rtc::scoped_ptr to std::vector.	37 // and histories from rtc::scoped_ptr to std::vector.

105

106 // The current average X and X^2.

107 rtc::scoped_ptr<std::complex<float>[]> running_mean_;

108 rtc::scoped_ptr<std::complex<float>[]> running_mean_sq_;	38 rtc::scoped_ptr<std::complex<float>[]> running_mean_sq_;

109	39

110 // Average X and X^2 for the current block in kStepBlocked.	40 // The current magnitude array.

111 rtc::scoped_ptr<std::complex<float>[]> sub_running_mean_;	41 rtc::scoped_ptr<float[]> magnitude_;

112 rtc::scoped_ptr<std::complex<float>[]> sub_running_mean_sq_;	42 // The current power array.

113	43 rtc::scoped_ptr<float[]> power_;

114 // Sample history for the rolling window in kStepWindowed and block-wise

115 // histories for kStepBlocked.

116 rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> history_;

117 rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> subhistory_;

118 rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> subhistory_sq_;

119

120 // The current set of variances and sums for Welford's algorithm.

121 rtc::scoped_ptr<float[]> variance_;

122 rtc::scoped_ptr<float[]> conj_sum_;

123	44

124 const size_t num_freqs_;	45 const size_t num_freqs_;

125 const size_t window_size_;

126 const float decay_;	46 const float decay_;

127 size_t history_cursor_;

128 size_t count_;

129 float array_mean_;

130 bool buffer_full_;

131 void (VarianceArray::step_func_)(const std::complex<float>, bool);

132 };	47 };

133	48

134 // Helper class for smoothing gain changes. On each applicatiion step, the	49 // Helper class for smoothing gain changes. On each application step, the

135 // currently used gains are changed towards a set of settable target gains,	50 // currently used gains are changed towards a set of settable target gains,

136 // constrained by a limit on the magnitude of the changes.	51 // constrained by a limit on the magnitude of the changes.

137 class GainApplier {	52 class GainApplier {

138 public:	53 public:

139 GainApplier(size_t freqs, float change_limit);	54 GainApplier(size_t freqs, float change_limit);

140	55

141 // Copy \|in_block\| to \|out_block\|, multiplied by the current set of gains,	56 // Copy \|in_block\| to \|out_block\|, multiplied by the current set of gains,

142 // and step the current set of gains towards the target set.	57 // and step the current set of gains towards the target set.

143 void Apply(const std::complex<float>* in_block,	58 void Apply(const std::complex<float>* in_block,

144 std::complex<float>* out_block);	59 std::complex<float>* out_block);

145	60

146 // Return the current target gain set. Modify this array to set the targets.	61 // Return the current target gain set. Modify this array to set the targets.

147 float* target() const { return target_.get(); }	62 float* target() const { return target_.get(); }

148	63

149 private:	64 private:

150 const size_t num_freqs_;	65 const size_t num_freqs_;

151 const float change_limit_;	66 const float change_limit_;

152 rtc::scoped_ptr<float[]> target_;	67 rtc::scoped_ptr<float[]> target_;

153 rtc::scoped_ptr<float[]> current_;	68 rtc::scoped_ptr<float[]> current_;

154 };	69 };

155	70

156 } // namespace intelligibility

157

158 } // namespace webrtc	71 } // namespace webrtc

159	72

160 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS _H_	73 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS _H_

OLD	NEW