webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1227213002: Update audio code to use size_t more correctly, webrtc/modules/audio_processing/

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1227213002: Update audio code to use size_t more correctly, webrtc/modules/audio_processing/ (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Resync Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/include/mock_audio_processing.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 25 matching lines...) Expand all Loading...
36 // Construct a new instance with the given filter bank resolution,	36 // Construct a new instance with the given filter bank resolution,

37 // sampling rate, number of channels and analysis rates.	37 // sampling rate, number of channels and analysis rates.

38 // \|analysis_rate\| sets the number of input blocks (containing speech!)	38 // \|analysis_rate\| sets the number of input blocks (containing speech!)

39 // to elapse before a new gain computation is made. \|variance_rate\| specifies	39 // to elapse before a new gain computation is made. \|variance_rate\| specifies

40 // the number of gain recomputations after which the variances are reset.	40 // the number of gain recomputations after which the variances are reset.

41 // \|cv_*\| are parameters for the VarianceArray constructor for the	41 // \|cv_*\| are parameters for the VarianceArray constructor for the

42 // clear speech stream.	42 // clear speech stream.

43 // TODO(bercic): the \|cv_\|, \|_rate\| and \|gain_limit\| parameters should	43 // TODO(bercic): the \|cv_\|, \|_rate\| and \|gain_limit\| parameters should

44 // probably go away once fine tuning is done. They override the internal	44 // probably go away once fine tuning is done. They override the internal

45 // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).	45 // constants in the class (kGainChangeLimit, kAnalyzeRate, kVarianceRate).

46 IntelligibilityEnhancer(int erb_resolution,	46 IntelligibilityEnhancer(size_t erb_resolution,

47 int sample_rate_hz,	47 int sample_rate_hz,

48 int channels,	48 int channels,

49 int cv_type,	49 int cv_type,

50 float cv_alpha,	50 float cv_alpha,

51 int cv_win,	51 size_t cv_win,

52 int analysis_rate,	52 int analysis_rate,

53 int variance_rate,	53 int variance_rate,

54 float gain_limit);	54 float gain_limit);

55 ~IntelligibilityEnhancer();	55 ~IntelligibilityEnhancer();

56	56

57 // Reads and processes chunk of noise stream in time domain.	57 // Reads and processes chunk of noise stream in time domain.

58 void ProcessCaptureAudio(float* const* audio);	58 void ProcessCaptureAudio(float* const* audio);

59	59

60 // Reads chunk of speech in time domain and updates with modified signal.	60 // Reads chunk of speech in time domain and updates with modified signal.

61 void ProcessRenderAudio(float* const* audio);	61 void ProcessRenderAudio(float* const* audio);

62	62

63 private:	63 private:

64 enum AudioSource {	64 enum AudioSource {

65 kRenderStream = 0, // Clear speech stream.	65 kRenderStream = 0, // Clear speech stream.

66 kCaptureStream, // Noise stream.	66 kCaptureStream, // Noise stream.

67 };	67 };

68	68

69 // Provides access point to the frequency domain.	69 // Provides access point to the frequency domain.

70 class TransformCallback : public LappedTransform::Callback {	70 class TransformCallback : public LappedTransform::Callback {

71 public:	71 public:

72 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);	72 TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);

73	73

74 // All in frequency domain, receives input \|in_block\|, applies	74 // All in frequency domain, receives input \|in_block\|, applies

75 // intelligibility enhancement, and writes result to \|out_block\|.	75 // intelligibility enhancement, and writes result to \|out_block\|.

76 void ProcessAudioBlock(const std::complex<float>* const* in_block,	76 void ProcessAudioBlock(const std::complex<float>* const* in_block,

77 int in_channels,	77 int in_channels,

78 int frames,	78 size_t frames,

79 int out_channels,	79 int out_channels,

80 std::complex<float>* const* out_block) override;	80 std::complex<float>* const* out_block) override;

81	81

82 private:	82 private:

83 IntelligibilityEnhancer* parent_;	83 IntelligibilityEnhancer* parent_;

84 AudioSource source_;	84 AudioSource source_;

85 };	85 };

86 friend class TransformCallback;	86 friend class TransformCallback;

87 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);	87 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);

88 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);	88 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

(...skipping 15 matching lines...) Expand all Loading...
104 void SolveForLambda(float power_target, float power_bot, float power_top);	104 void SolveForLambda(float power_target, float power_bot, float power_top);

105	105

106 // Transforms freq gains to ERB gains.	106 // Transforms freq gains to ERB gains.

107 void UpdateErbGains();	107 void UpdateErbGains();

108	108

109 // Updates variance calculation for noise input with \|in_block\|.	109 // Updates variance calculation for noise input with \|in_block\|.

110 void ProcessNoiseBlock(const std::complex<float>* in_block,	110 void ProcessNoiseBlock(const std::complex<float>* in_block,

111 std::complex<float>* out_block);	111 std::complex<float>* out_block);

112	112

113 // Returns number of ERB filters.	113 // Returns number of ERB filters.

114 static int GetBankSize(int sample_rate, int erb_resolution);	114 static size_t GetBankSize(int sample_rate, size_t erb_resolution);

115	115

116 // Initializes ERB filterbank.	116 // Initializes ERB filterbank.

117 void CreateErbBank();	117 void CreateErbBank();

118	118

119 // Analytically solves quadratic for optimal gains given \|lambda\|.	119 // Analytically solves quadratic for optimal gains given \|lambda\|.

120 // Negative gains are set to 0. Stores the results in \|sols\|.	120 // Negative gains are set to 0. Stores the results in \|sols\|.

121 void SolveForGainsGivenLambda(float lambda, int start_freq, float* sols);	121 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

122	122

123 // Computes variance across ERB filters from freq variance \|var\|.	123 // Computes variance across ERB filters from freq variance \|var\|.

124 // Stores in \|result\|.	124 // Stores in \|result\|.

125 void FilterVariance(const float* var, float* result);	125 void FilterVariance(const float* var, float* result);

126	126

127 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.	127 // Returns dot product of vectors specified by size \|length\| arrays \|a\|,\|b\|.

128 static float DotProduct(const float* a, const float* b, int length);	128 static float DotProduct(const float* a, const float* b, size_t length);

129	129

130 const int freqs_; // Num frequencies in frequency domain.	130 const size_t freqs_; // Num frequencies in frequency domain.

131 const int window_size_; // Window size in samples; also the block size.	131 const size_t window_size_; // Window size in samples; also the block size.

132 const int chunk_length_; // Chunk size in samples.	132 const size_t chunk_length_; // Chunk size in samples.

133 const int bank_size_; // Num ERB filters.	133 const size_t bank_size_; // Num ERB filters.

134 const int sample_rate_hz_;	134 const int sample_rate_hz_;

135 const int erb_resolution_;	135 const int erb_resolution_;

136 const int channels_; // Num channels.	136 const int channels_; // Num channels.

137 const int analysis_rate_; // Num blocks before gains recalculated.	137 const int analysis_rate_; // Num blocks before gains recalculated.

138 const int variance_rate_; // Num recalculations before history is cleared.	138 const int variance_rate_; // Num recalculations before history is cleared.

139	139

140 intelligibility::VarianceArray clear_variance_;	140 intelligibility::VarianceArray clear_variance_;

141 intelligibility::VarianceArray noise_variance_;	141 intelligibility::VarianceArray noise_variance_;

142 rtc::scoped_ptr<float[]> filtered_clear_var_;	142 rtc::scoped_ptr<float[]> filtered_clear_var_;

143 rtc::scoped_ptr<float[]> filtered_noise_var_;	143 rtc::scoped_ptr<float[]> filtered_noise_var_;

144 std::vector<std::vector<float>> filter_bank_;	144 std::vector<std::vector<float>> filter_bank_;

145 rtc::scoped_ptr<float[]> center_freqs_;	145 rtc::scoped_ptr<float[]> center_freqs_;

146 int start_freq_;	146 size_t start_freq_;

147 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.	147 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

148 // for each ERB band.	148 // for each ERB band.

149 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.	149 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.

150 intelligibility::GainApplier gain_applier_;	150 intelligibility::GainApplier gain_applier_;

151	151

152 // Destination buffer used to reassemble blocked chunks before overwriting	152 // Destination buffer used to reassemble blocked chunks before overwriting

153 // the original input array with modifications.	153 // the original input array with modifications.

154 // TODO(ekmeyerson): Switch to using ChannelBuffer.	154 // TODO(ekmeyerson): Switch to using ChannelBuffer.

155 float** temp_out_buffer_;	155 float** temp_out_buffer_;

156	156

(...skipping 11 matching lines...) Expand all Loading...
168 // Note: VAD currently does not affect anything in IntelligibilityEnhancer.	168 // Note: VAD currently does not affect anything in IntelligibilityEnhancer.

169 VadInst* vad_high_;	169 VadInst* vad_high_;

170 VadInst* vad_low_;	170 VadInst* vad_low_;

171 rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;	171 rtc::scoped_ptr<int16_t[]> vad_tmp_buffer_;

172 bool has_voice_low_; // Whether voice detected in speech stream.	172 bool has_voice_low_; // Whether voice detected in speech stream.

173 };	173 };

174	174

175 } // namespace webrtc	175 } // namespace webrtc

176	176

177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	177 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW