webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1227213002: Update audio code to use size_t more correctly, webrtc/modules/audio_processing/

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1227213002: Update audio code to use size_t more correctly, webrtc/modules/audio_processing/ (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc ('K') | « webrtc/modules/audio_processing/include/mock_audio_processing.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 24 matching lines...) Expand all Loading...
35 // Construct a new instance with the given filter bank resolution,	35 // Construct a new instance with the given filter bank resolution,

36 // sampling rate, number of channels and analysis rates.	36 // sampling rate, number of channels and analysis rates.

37 // \|analysis_rate\| sets the number of input blocks (containing speech!)	37 // \|analysis_rate\| sets the number of input blocks (containing speech!)

38 // to elapse before a new gain computation is made. \|variance_rate\| specifies	38 // to elapse before a new gain computation is made. \|variance_rate\| specifies

39 // the number of gain recomputations after which the variances are reset.	39 // the number of gain recomputations after which the variances are reset.

40 // \|cv_*\| are parameters for the VarianceArray constructor for the	40 // \|cv_*\| are parameters for the VarianceArray constructor for the

41 // clear speech stream.	41 // clear speech stream.

42 // TODO(bercic): the \|cv_\|, \|_rate\| and \|gain_limit\| parameters should	42 // TODO(bercic): the \|cv_\|, \|_rate\| and \|gain_limit\| parameters should

43 // probably go away once fine tuning is done. They override the internal	43 // probably go away once fine tuning is done. They override the internal

44 // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).	44 // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).

45 IntelligibilityEnhancer(int erb_resolution,	45 IntelligibilityEnhancer(size_t erb_resolution,

46 int sample_rate_hz,	46 int sample_rate_hz,

47 int channels,	47 int channels,

48 int cv_type,	48 int cv_type,

49 float cv_alpha,	49 float cv_alpha,

50 int cv_win,	50 size_t cv_win,

51 int analysis_rate,	51 int analysis_rate,

52 int variance_rate,	52 int variance_rate,

53 float gain_limit);	53 float gain_limit);

54 ~IntelligibilityEnhancer();	54 ~IntelligibilityEnhancer();

55	55

56 // Reads and processes chunk of noise stream in time domain.	56 // Reads and processes chunk of noise stream in time domain.

57 void ProcessCaptureAudio(float* const* audio);	57 void ProcessCaptureAudio(float* const* audio);

58	58

59 // Reads chunk of speech in time domain and updates with modified signal.	59 // Reads chunk of speech in time domain and updates with modified signal.

60 void ProcessRenderAudio(float* const* audio);	60 void ProcessRenderAudio(float* const* audio);

61	61

62 private:	62 private:

63 enum AudioSource {	63 enum AudioSource {

64 kRenderStream = 0, // Clear speech stream.	64 kRenderStream = 0, // Clear speech stream.

65 kCaptureStream, // Noise stream.	65 kCaptureStream, // Noise stream.

66 };	66 };

67	67

68 // Provides access point to the frequency domain.	68 // Provides access point to the frequency domain.

69 class TransformCallback : public LappedTransform::Callback {	69 class TransformCallback : public LappedTransform::Callback {

70 public:	70 public:

71 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);	71 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);

72	72

73 // All in frequency domain, receives input \|in_block\|, applies	73 // All in frequency domain, receives input \|in_block\|, applies

74 // intelligibility enhancement, and writes result to \|out_block\|.	74 // intelligibility enhancement, and writes result to \|out_block\|.

75 virtual void ProcessAudioBlock(const std::complex<float>* const* in_block,	75 void ProcessAudioBlock(const std::complex<float>* const* in_block,

76 int in_channels,	76 int in_channels,

77 int frames,	77 size_t frames,

78 int out_channels,	78 int out_channels,

79 std::complex<float>* const* out_block);	79 std::complex<float>* const* out_block) override;

80	80

81 private:	81 private:

82 IntelligibilityEnhancer* parent_;	82 IntelligibilityEnhancer* parent_;

83 AudioSource source_;	83 AudioSource source_;

84 };	84 };

85 friend class TransformCallback;	85 friend class TransformCallback;

86	86

87 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.	87 // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.

88 void DispatchAudio(AudioSource source,	88 void DispatchAudio(AudioSource source,

89 const std::complex<float>* in_block,	89 const std::complex<float>* in_block,

90 std::complex<float>* out_block);	90 std::complex<float>* out_block);

91	91

92 // Updates variance computation and analysis with \|in_block_\|,	92 // Updates variance computation and analysis with \|in_block_\|,

93 // and writes modified speech to \|out_block\|.	93 // and writes modified speech to \|out_block\|.

94 void ProcessClearBlock(const std::complex<float>* in_block,	94 void ProcessClearBlock(const std::complex<float>* in_block,

95 std::complex<float>* out_block);	95 std::complex<float>* out_block);

96	96

97 // Computes and sets modified gains.	97 // Computes and sets modified gains.

98 void AnalyzeClearBlock(float power_target);	98 void AnalyzeClearBlock(float power_target);

99	99

100 // Updates variance calculation for noise input with \|in_block\|.	100 // Updates variance calculation for noise input with \|in_block\|.

101 void ProcessNoiseBlock(const std::complex<float>* in_block,	101 void ProcessNoiseBlock(const std::complex<float>* in_block,

102 std::complex<float>* out_block);	102 std::complex<float>* out_block);

103	103

104 // Returns number of ERB filters.	104 // Returns number of ERB filters.

105 static int GetBankSize(int sample_rate, int erb_resolution);	105 static size_t GetBankSize(int sample_rate, size_t erb_resolution);

106	106

107 // Initializes ERB filterbank.	107 // Initializes ERB filterbank.

108 void CreateErbBank();	108 void CreateErbBank();

109	109

110 // Analytically solves quadratic for optimal gains given \|lambda\|.	110 // Analytically solves quadratic for optimal gains given \|lambda\|.

111 // Negative gains are set to 0. Stores the results in \|sols\|.	111 // Negative gains are set to 0. Stores the results in \|sols\|.

112 void SolveForGainsGivenLambda(float lambda, int start_freq, float* sols);	112 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

113	113

114 // Computes variance across ERB filters from freq variance \|var\|.	114 // Computes variance across ERB filters from freq variance \|var\|.

115 // Stores in \|result\|.	115 // Stores in \|result\|.

116 void FilterVariance(const float* var, float* result);	116 void FilterVariance(const float* var, float* result);

117	117

118 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.	118 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.

119 static float DotProduct(const float* a, const float* b, int length);	119 static float DotProduct(const float* a, const float* b, size_t length);

120	120

121 static const int kErbResolution;

122 static const int kWindowSizeMs;	121 static const int kWindowSizeMs;

123 static const int kChunkSizeMs;	122 static const int kChunkSizeMs;

124 static const int kAnalyzeRate; // Default for \|analysis_rate_\|.	123 static const int kAnalyzeRate; // Default for \|analysis_rate_\|.

125 static const int kVarianceRate; // Default for \|variance_rate_\|.	124 static const int kVarianceRate; // Default for \|variance_rate_\|.

126 static const float kClipFreq;	125 static const float kClipFreq;

127 static const float kConfigRho; // Default production and interpretation SNR.	126 static const float kConfigRho; // Default production and interpretation SNR.

128 static const float kKbdAlpha;	127 static const float kKbdAlpha;

129 static const float kGainChangeLimit;	128 static const float kGainChangeLimit;

130	129

131 const int freqs_; // Num frequencies in frequency domain.	130 const size_t freqs_; // Num frequencies in frequency domain.

132 const int window_size_; // Window size in samples; also the block size.	131 const size_t window_size_; // Window size in samples; also the block size.

133 const int chunk_length_; // Chunk size in samples.	132 const size_t chunk_length_; // Chunk size in samples.

134 const int bank_size_; // Num ERB filters.	133 const size_t bank_size_; // Num ERB filters.

135 const int sample_rate_hz_;	134 const int sample_rate_hz_;

136 const int erb_resolution_;	135 const int erb_resolution_;

137 const int channels_; // Num channels.	136 const int channels_; // Num channels.

138 const int analysis_rate_; // Num blocks before gains recalculated.	137 const int analysis_rate_; // Num blocks before gains recalculated.

139 const int variance_rate_; // Num recalculations before history is cleared.	138 const int variance_rate_; // Num recalculations before history is cleared.

140	139

141 intelligibility::VarianceArray clear_variance_;	140 intelligibility::VarianceArray clear_variance_;

142 intelligibility::VarianceArray noise_variance_;	141 intelligibility::VarianceArray noise_variance_;

143 rtc::scoped_ptr<float[]> filtered_clear_var_;	142 rtc::scoped_ptr<float[]> filtered_clear_var_;

144 rtc::scoped_ptr<float[]> filtered_noise_var_;	143 rtc::scoped_ptr<float[]> filtered_noise_var_;

145 float** filter_bank_; // TODO(ekmeyerson): Switch to using ChannelBuffer.	144 float** filter_bank_; // TODO(ekmeyerson): Switch to using ChannelBuffer.

146 rtc::scoped_ptr<float[]> center_freqs_;	145 rtc::scoped_ptr<float[]> center_freqs_;

147 int start_freq_;	146 size_t start_freq_;

148 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.	147 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

149 // for each ERB band.	148 // for each ERB band.

150 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.	149 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.

151 intelligibility::GainApplier gain_applier_;	150 intelligibility::GainApplier gain_applier_;

152	151

153 // Destination buffer used to reassemble blocked chunks before overwriting	152 // Destination buffer used to reassemble blocked chunks before overwriting

154 // the original input array with modifications.	153 // the original input array with modifications.

155 // TODO(ekmeyerson): Switch to using ChannelBuffer.	154 // TODO(ekmeyerson): Switch to using ChannelBuffer.

156 float** temp_out_buffer_;	155 float** temp_out_buffer_;

157	156

(...skipping 11 matching lines...) Expand all Loading...
169 // Note: VAD currently does not affect anything in IntelligibilityEnhancer.	168 // Note: VAD currently does not affect anything in IntelligibilityEnhancer.

170 VadInst* vad_high_;	169 VadInst* vad_high_;

171 VadInst* vad_low_;	170 VadInst* vad_low_;

172 rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;	171 rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;

173 bool has_voice_low_; // Whether voice detected in speech stream.	172 bool has_voice_low_; // Whether voice detected in speech stream.

174 };	173 };

175	174

176 } // namespace webrtc	175 } // namespace webrtc

177	176

178 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW