webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1672343002: Using the NS noise estimate for the IE

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1672343002: Using the NS noise estimate for the IE (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ns

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
53 float var_decay_rate;	53 float var_decay_rate;

54 size_t var_window_size;	54 size_t var_window_size;

55 int analysis_rate;	55 int analysis_rate;

56 float gain_change_limit;	56 float gain_change_limit;

57 float rho;	57 float rho;

58 };	58 };

59	59

60 explicit IntelligibilityEnhancer(const Config& config);	60 explicit IntelligibilityEnhancer(const Config& config);

61 IntelligibilityEnhancer(); // Initialize with default config.	61 IntelligibilityEnhancer(); // Initialize with default config.

62	62

63 // Reads and processes chunk of noise stream in time domain.	63 // Sets the capture noise estimate.
	hlundin-webrtc 2016/02/08 10:29:28 "Noise estimate" is a bit generic. It is the noise "Noise estimate" is a bit generic. It is the noise spectrum, right? aluebs-webrtc 2016/02/09 00:19:15 Right. Improved the comment. Show quoted text On 2016/02/08 10:29:28, hlundin-webrtc wrote: > "Noise estimate" is a bit generic. It is the noise spectrum, right? Right. Improved the comment.
64 void AnalyzeCaptureAudio(float* const* audio,	64 void SetCaptureNoiseEstimate(const std::vector<float>& noise);

65 int sample_rate_hz,

66 size_t num_channels);

67	65

68 // Reads chunk of speech in time domain and updates with modified signal.	66 // Reads chunk of speech in time domain and updates with modified signal.

69 void ProcessRenderAudio(float* const* audio,	67 void ProcessRenderAudio(float* const* audio,

70 int sample_rate_hz,	68 int sample_rate_hz,

71 size_t num_channels);	69 size_t num_channels);

72 bool active() const;	70 bool active() const;

73	71

74 private:	72 private:

75 enum AudioSource {

76 kRenderStream = 0, // Clear speech stream.

77 kCaptureStream, // Noise stream.

78 };

79

80 // Provides access point to the frequency domain.	73 // Provides access point to the frequency domain.

81 class TransformCallback : public LappedTransform::Callback {	74 class TransformCallback : public LappedTransform::Callback {

82 public:	75 public:

83 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);	76 TransformCallback(IntelligibilityEnhancer* parent);

84	77

85 // All in frequency domain, receives input \|in_block\|, applies	78 // All in frequency domain, receives input \|in_block\|, applies

86 // intelligibility enhancement, and writes result to \|out_block\|.	79 // intelligibility enhancement, and writes result to \|out_block\|.

87 void ProcessAudioBlock(const std::complex<float>* const* in_block,	80 void ProcessAudioBlock(const std::complex<float>* const* in_block,

88 size_t in_channels,	81 size_t in_channels,

89 size_t frames,	82 size_t frames,

90 size_t out_channels,	83 size_t out_channels,

91 std::complex<float>* const* out_block) override;	84 std::complex<float>* const* out_block) override;

92	85

93 private:	86 private:

94 IntelligibilityEnhancer* parent_;	87 IntelligibilityEnhancer* parent_;

95 AudioSource source_;

96 };	88 };

97 friend class TransformCallback;	89 friend class TransformCallback;

98 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);	90 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);

99 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);	91 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

100	92

101 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.

102 void DispatchAudio(AudioSource source,

103 const std::complex<float>* in_block,

104 std::complex<float>* out_block);

105

106 // Updates variance computation and analysis with \|in_block_\|,	93 // Updates variance computation and analysis with \|in_block_\|,

107 // and writes modified speech to \|out_block\|.	94 // and writes modified speech to \|out_block\|.

108 void ProcessClearBlock(const std::complex<float>* in_block,	95 void ProcessClearBlock(const std::complex<float>* in_block,

109 std::complex<float>* out_block);	96 std::complex<float>* out_block);

110	97

111 // Computes and sets modified gains.	98 // Computes and sets modified gains.

112 void AnalyzeClearBlock(float power_target);	99 void AnalyzeClearBlock(float power_target);

113	100

114 // Bisection search for optimal \|lambda\|.	101 // Bisection search for optimal \|lambda\|.

115 void SolveForLambda(float power_target, float power_bot, float power_top);	102 void SolveForLambda(float power_target, float power_bot, float power_top);

116	103

117 // Transforms freq gains to ERB gains.	104 // Transforms freq gains to ERB gains.

118 void UpdateErbGains();	105 void UpdateErbGains();

119	106

120 // Updates variance calculation for noise input with \|in_block\|.

121 void ProcessNoiseBlock(const std::complex<float>* in_block,

122 std::complex<float>* out_block);

123

124 // Returns number of ERB filters.	107 // Returns number of ERB filters.

125 static size_t GetBankSize(int sample_rate, size_t erb_resolution);	108 static size_t GetBankSize(int sample_rate, size_t erb_resolution);

126	109

127 // Initializes ERB filterbank.	110 // Initializes ERB filterbank.

128 void CreateErbBank();	111 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);

129	112

130 // Analytically solves quadratic for optimal gains given \|lambda\|.	113 // Analytically solves quadratic for optimal gains given \|lambda\|.

131 // Negative gains are set to 0. Stores the results in \|sols\|.	114 // Negative gains are set to 0. Stores the results in \|sols\|.

132 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);	115 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

133	116

134 // Computes variance across ERB filters from freq variance \|var\|.	117 // Computes variance across ERB filters from freq variance \|var\|.

135 // Stores in \|result\|.	118 // Stores in \|result\|.

136 void FilterVariance(const float* var, float* result);	119 void FilterVariance(const float* var,

	120 const std::vector<std::vector<float>>& filter_bank,

	121 float* result);

137	122

138 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.	123 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.

139 static float DotProduct(const float* a, const float* b, size_t length);	124 static float DotProduct(const float* a, const float* b, size_t length);

140	125

141 const size_t freqs_; // Num frequencies in frequency domain.	126 const size_t freqs_; // Num frequencies in frequency domain.

142 const size_t window_size_; // Window size in samples; also the block size.	127 const size_t window_size_; // Window size in samples; also the block size.

143 const size_t chunk_length_; // Chunk size in samples.	128 const size_t chunk_length_; // Chunk size in samples.

144 const size_t bank_size_; // Num ERB filters.	129 const size_t bank_size_; // Num ERB filters.

145 const int sample_rate_hz_;	130 const int sample_rate_hz_;

146 const int erb_resolution_;	131 const int erb_resolution_;

147 const size_t num_capture_channels_;	132 const size_t num_capture_channels_;

148 const size_t num_render_channels_;	133 const size_t num_render_channels_;

149 const int analysis_rate_; // Num blocks before gains recalculated.	134 const int analysis_rate_; // Num blocks before gains recalculated.

150	135

151 const bool active_; // Whether render gains are being updated.	136 const bool active_; // Whether render gains are being updated.

152 // TODO(ekm): Add logic for updating \|active_\|.	137 // TODO(ekm): Add logic for updating \|active_\|.

153	138

154 intelligibility::VarianceArray clear_variance_;	139 intelligibility::VarianceArray clear_variance_;

155 intelligibility::VarianceArray noise_variance_;	140 std::vector<float> noise_power_;

156 rtc::scoped_ptr<float[]> filtered_clear_var_;	141 rtc::scoped_ptr<float[]> filtered_clear_var_;

157 rtc::scoped_ptr<float[]> filtered_noise_var_;	142 rtc::scoped_ptr<float[]> filtered_noise_var_;

158 std::vector<std::vector<float>> filter_bank_;

159 rtc::scoped_ptr<float[]> center_freqs_;	143 rtc::scoped_ptr<float[]> center_freqs_;

	144 std::vector<std::vector<float>> capture_filter_bank_;

	145 std::vector<std::vector<float>> render_filter_bank_;

160 size_t start_freq_;	146 size_t start_freq_;

161 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.	147 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

162 // for each ERB band.	148 // for each ERB band.

163 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.	149 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.

164 intelligibility::GainApplier gain_applier_;	150 intelligibility::GainApplier gain_applier_;

165	151

166 // Destination buffers used to reassemble blocked chunks before overwriting	152 // Destination buffers used to reassemble blocked chunks before overwriting

167 // the original input array with modifications.	153 // the original input array with modifications.

168 ChannelBuffer<float> temp_render_out_buffer_;	154 ChannelBuffer<float> temp_render_out_buffer_;

169 ChannelBuffer<float> temp_capture_out_buffer_;

170	155

171 rtc::scoped_ptr<float[]> kbd_window_;	156 rtc::scoped_ptr<float[]> kbd_window_;

172 TransformCallback render_callback_;	157 TransformCallback render_callback_;

173 TransformCallback capture_callback_;

174 rtc::scoped_ptr<LappedTransform> render_mangler_;	158 rtc::scoped_ptr<LappedTransform> render_mangler_;

175 rtc::scoped_ptr<LappedTransform> capture_mangler_;

176 int block_count_;	159 int block_count_;

177 int analysis_step_;	160 int analysis_step_;

178 };	161 };

179	162

180 } // namespace webrtc	163 } // namespace webrtc

181	164

182 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	165 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW