webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1207353002: Add new variance update option and unittests for intelligibility

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1207353002: Add new variance update option and unittests for intelligibility (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Merge Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_tests.gypi ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //	11 //

12 // Specifies core class for intelligbility enhancement.	12 // Specifies core class for intelligbility enhancement.

13 //	13 //

14	14

15 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	15 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

16 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	16 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

17	17

18 #include <complex>	18 #include <complex>

	19 #include <vector>

19	20

20 #include "webrtc/base/scoped_ptr.h"	21 #include "webrtc/base/scoped_ptr.h"

21 #include "webrtc/common_audio/lapped_transform.h"	22 #include "webrtc/common_audio/lapped_transform.h"

22 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	23 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

23	24

24 struct WebRtcVadInst;	25 struct WebRtcVadInst;

25 typedef struct WebRtcVadInst VadInst;	26 typedef struct WebRtcVadInst VadInst;

26	27

27 namespace webrtc {	28 namespace webrtc {

28	29

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
76 int in_channels,	77 int in_channels,

77 int frames,	78 int frames,

78 int out_channels,	79 int out_channels,

79 std::complex<float>* const* out_block);	80 std::complex<float>* const* out_block);

80	81

81 private:	82 private:

82 IntelligibilityEnhancer* parent_;	83 IntelligibilityEnhancer* parent_;

83 AudioSource source_;	84 AudioSource source_;

84 };	85 };

85 friend class TransformCallback;	86 friend class TransformCallback;

	87 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);

	88 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

86	89

87 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.	90 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.

88 void DispatchAudio(AudioSource source,	91 void DispatchAudio(AudioSource source,

89 const std::complex<float>* in_block,	92 const std::complex<float>* in_block,

90 std::complex<float>* out_block);	93 std::complex<float>* out_block);

91	94

92 // Updates variance computation and analysis with \|in_block_\|,	95 // Updates variance computation and analysis with \|in_block_\|,

93 // and writes modified speech to \|out_block\|.	96 // and writes modified speech to \|out_block\|.

94 void ProcessClearBlock(const std::complex<float>* in_block,	97 void ProcessClearBlock(const std::complex<float>* in_block,

95 std::complex<float>* out_block);	98 std::complex<float>* out_block);

96	99

97 // Computes and sets modified gains.	100 // Computes and sets modified gains.

98 void AnalyzeClearBlock(float power_target);	101 void AnalyzeClearBlock(float power_target);

99	102

	103 // Bisection search for optimal \|lambda\|.

	104 void SolveForLambda(float power_target, float power_bot, float power_top);

	105

	106 // Transforms freq gains to ERB gains.

	107 void UpdateErbGains();

	108

100 // Updates variance calculation for noise input with \|in_block\|.	109 // Updates variance calculation for noise input with \|in_block\|.

101 void ProcessNoiseBlock(const std::complex<float>* in_block,	110 void ProcessNoiseBlock(const std::complex<float>* in_block,

102 std::complex<float>* out_block);	111 std::complex<float>* out_block);

103	112

104 // Returns number of ERB filters.	113 // Returns number of ERB filters.

105 static int GetBankSize(int sample_rate, int erb_resolution);	114 static int GetBankSize(int sample_rate, int erb_resolution);

106	115

107 // Initializes ERB filterbank.	116 // Initializes ERB filterbank.

108 void CreateErbBank();	117 void CreateErbBank();

109	118

110 // Analytically solves quadratic for optimal gains given \|lambda\|.	119 // Analytically solves quadratic for optimal gains given \|lambda\|.

111 // Negative gains are set to 0. Stores the results in \|sols\|.	120 // Negative gains are set to 0. Stores the results in \|sols\|.

112 void SolveForGainsGivenLambda(float lambda, int start_freq, float* sols);	121 void SolveForGainsGivenLambda(float lambda, int start_freq, float* sols);

113	122

114 // Computes variance across ERB filters from freq variance \|var\|.	123 // Computes variance across ERB filters from freq variance \|var\|.

115 // Stores in \|result\|.	124 // Stores in \|result\|.

116 void FilterVariance(const float* var, float* result);	125 void FilterVariance(const float* var, float* result);

117	126

118 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.	127 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.

119 static float DotProduct(const float* a, const float* b, int length);	128 static float DotProduct(const float* a, const float* b, int length);

120	129

121 static const int kErbResolution;

122 static const int kWindowSizeMs;

123 static const int kChunkSizeMs;

124 static const int kAnalyzeRate; // Default for \|analysis_rate_\|.

125 static const int kVarianceRate; // Default for \|variance_rate_\|.

126 static const float kClipFreq;

127 static const float kConfigRho; // Default production and interpretation SNR.

128 static const float kKbdAlpha;

129 static const float kGainChangeLimit;

130

131 const int freqs_; // Num frequencies in frequency domain.	130 const int freqs_; // Num frequencies in frequency domain.

132 const int window_size_; // Window size in samples; also the block size.	131 const int window_size_; // Window size in samples; also the block size.

133 const int chunk_length_; // Chunk size in samples.	132 const int chunk_length_; // Chunk size in samples.

134 const int bank_size_; // Num ERB filters.	133 const int bank_size_; // Num ERB filters.

135 const int sample_rate_hz_;	134 const int sample_rate_hz_;

136 const int erb_resolution_;	135 const int erb_resolution_;

137 const int channels_; // Num channels.	136 const int channels_; // Num channels.

138 const int analysis_rate_; // Num blocks before gains recalculated.	137 const int analysis_rate_; // Num blocks before gains recalculated.

139 const int variance_rate_; // Num recalculations before history is cleared.	138 const int variance_rate_; // Num recalculations before history is cleared.

140	139

141 intelligibility::VarianceArray clear_variance_;	140 intelligibility::VarianceArray clear_variance_;

142 intelligibility::VarianceArray noise_variance_;	141 intelligibility::VarianceArray noise_variance_;

143 rtc::scoped_ptr<float[]> filtered_clear_var_;	142 rtc::scoped_ptr<float[]> filtered_clear_var_;

144 rtc::scoped_ptr<float[]> filtered_noise_var_;	143 rtc::scoped_ptr<float[]> filtered_noise_var_;

145 float** filter_bank_; // TODO(ekmeyerson): Switch to using ChannelBuffer.	144 std::vector<std::vector<float>> filter_bank_;

146 rtc::scoped_ptr<float[]> center_freqs_;	145 rtc::scoped_ptr<float[]> center_freqs_;

147 int start_freq_;	146 int start_freq_;

148 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.	147 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

149 // for each ERB band.	148 // for each ERB band.

150 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.	149 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.

151 intelligibility::GainApplier gain_applier_;	150 intelligibility::GainApplier gain_applier_;

152	151

153 // Destination buffer used to reassemble blocked chunks before overwriting	152 // Destination buffer used to reassemble blocked chunks before overwriting

154 // the original input array with modifications.	153 // the original input array with modifications.

155 // TODO(ekmeyerson): Switch to using ChannelBuffer.	154 // TODO(ekmeyerson): Switch to using ChannelBuffer.

(...skipping 13 matching lines...) Expand all Loading...
169 // Note: VAD currently does not affect anything in IntelligibilityEnhancer.	168 // Note: VAD currently does not affect anything in IntelligibilityEnhancer.

170 VadInst* vad_high_;	169 VadInst* vad_high_;

171 VadInst* vad_low_;	170 VadInst* vad_low_;

172 rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;	171 rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;

173 bool has_voice_low_; // Whether voice detected in speech stream.	172 bool has_voice_low_; // Whether voice detected in speech stream.

174 };	173 };

175	174

176 } // namespace webrtc	175 } // namespace webrtc

177	176

178 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW