Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: webrtc/modules/audio_processing/vad/vad_audio_proc.cc

Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master
Patch Set: Resync Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
69 VadAudioProc::~VadAudioProc() { 69 VadAudioProc::~VadAudioProc() {
70 } 70 }
71 71
72 void VadAudioProc::ResetBuffer() { 72 void VadAudioProc::ResetBuffer() {
73 memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], 73 memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
74 sizeof(audio_buffer_[0]) * kNumPastSignalSamples); 74 sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
75 num_buffer_samples_ = kNumPastSignalSamples; 75 num_buffer_samples_ = kNumPastSignalSamples;
76 } 76 }
77 77
78 int VadAudioProc::ExtractFeatures(const int16_t* frame, 78 int VadAudioProc::ExtractFeatures(const int16_t* frame,
79 int length, 79 size_t length,
80 AudioFeatures* features) { 80 AudioFeatures* features) {
81 features->num_frames = 0; 81 features->num_frames = 0;
82 if (length != kNumSubframeSamples) { 82 if (length != kNumSubframeSamples) {
83 return -1; 83 return -1;
84 } 84 }
85 85
86 // High-pass filter to remove the DC component and very low frequency content. 86 // High-pass filter to remove the DC component and very low frequency content.
87 // We have experienced that this high-pass filtering improves voice/non-voiced 87 // We have experienced that this high-pass filtering improves voice/non-voiced
88 // classification. 88 // classification.
89 if (high_pass_filter_->Filter(frame, kNumSubframeSamples, 89 if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
90 &audio_buffer_[num_buffer_samples_]) != 0) { 90 &audio_buffer_[num_buffer_samples_]) != 0) {
91 return -1; 91 return -1;
92 } 92 }
93 93
94 num_buffer_samples_ += kNumSubframeSamples; 94 num_buffer_samples_ += kNumSubframeSamples;
95 if (num_buffer_samples_ < kBufferLength) { 95 if (num_buffer_samples_ < kBufferLength) {
96 return 0; 96 return 0;
97 } 97 }
98 assert(num_buffer_samples_ == kBufferLength); 98 assert(num_buffer_samples_ == kBufferLength);
99 features->num_frames = kNum10msSubframes; 99 features->num_frames = kNum10msSubframes;
100 features->silence = false; 100 features->silence = false;
101 101
102 Rms(features->rms, kMaxNumFrames); 102 Rms(features->rms, kMaxNumFrames);
103 for (int i = 0; i < kNum10msSubframes; ++i) { 103 for (size_t i = 0; i < kNum10msSubframes; ++i) {
104 if (features->rms[i] < kSilenceRms) { 104 if (features->rms[i] < kSilenceRms) {
105 // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. 105 // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
106 // Bail out here instead. 106 // Bail out here instead.
107 features->silence = true; 107 features->silence = true;
108 ResetBuffer(); 108 ResetBuffer();
109 return 0; 109 return 0;
110 } 110 }
111 } 111 }
112 112
113 PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, 113 PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
114 kMaxNumFrames); 114 kMaxNumFrames);
115 FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); 115 FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
116 ResetBuffer(); 116 ResetBuffer();
117 return 0; 117 return 0;
118 } 118 }
119 119
120 // Computes |kLpcOrder + 1| correlation coefficients. 120 // Computes |kLpcOrder + 1| correlation coefficients.
121 void VadAudioProc::SubframeCorrelation(double* corr, 121 void VadAudioProc::SubframeCorrelation(double* corr,
122 int length_corr, 122 size_t length_corr,
123 int subframe_index) { 123 size_t subframe_index) {
124 assert(length_corr >= kLpcOrder + 1); 124 assert(length_corr >= kLpcOrder + 1);
125 double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; 125 double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
126 int buffer_index = subframe_index * kNumSubframeSamples; 126 size_t buffer_index = subframe_index * kNumSubframeSamples;
127 127
128 for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) 128 for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
129 windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; 129 windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
130 130
131 WebRtcIsac_AutoCorr(corr, windowed_audio, 131 WebRtcIsac_AutoCorr(corr, windowed_audio,
132 kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); 132 kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
133 } 133 }
134 134
135 // Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. 135 // Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
136 // The analysis window is 15 ms long and it is centered on the first half of 136 // The analysis window is 15 ms long and it is centered on the first half of
137 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the 137 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
138 // first half of each 10 ms subframe. 138 // first half of each 10 ms subframe.
139 void VadAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) { 139 void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {
140 assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1)); 140 assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
141 double corr[kLpcOrder + 1]; 141 double corr[kLpcOrder + 1];
142 double reflec_coeff[kLpcOrder]; 142 double reflec_coeff[kLpcOrder];
143 for (int i = 0, offset_lpc = 0; i < kNum10msSubframes; 143 for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;
144 i++, offset_lpc += kLpcOrder + 1) { 144 i++, offset_lpc += kLpcOrder + 1) {
145 SubframeCorrelation(corr, kLpcOrder + 1, i); 145 SubframeCorrelation(corr, kLpcOrder + 1, i);
146 corr[0] *= 1.0001; 146 corr[0] *= 1.0001;
147 // This makes Lev-Durb a bit more stable. 147 // This makes Lev-Durb a bit more stable.
148 for (int k = 0; k < kLpcOrder + 1; k++) { 148 for (size_t k = 0; k < kLpcOrder + 1; k++) {
149 corr[k] *= kCorrWeight[k]; 149 corr[k] *= kCorrWeight[k];
150 } 150 }
151 WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); 151 WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
152 } 152 }
153 } 153 }
154 154
155 // Fit a second order curve to these 3 points and find the location of the 155 // Fit a second order curve to these 3 points and find the location of the
156 // extremum. The points are inverted before curve fitting. 156 // extremum. The points are inverted before curve fitting.
157 static float QuadraticInterpolation(float prev_val, 157 static float QuadraticInterpolation(float prev_val,
158 float curr_val, 158 float curr_val,
159 float next_val) { 159 float next_val) {
160 // Doing the interpolation in |1 / A(z)|^2. 160 // Doing the interpolation in |1 / A(z)|^2.
161 float fractional_index = 0; 161 float fractional_index = 0;
162 next_val = 1.0f / next_val; 162 next_val = 1.0f / next_val;
163 prev_val = 1.0f / prev_val; 163 prev_val = 1.0f / prev_val;
164 curr_val = 1.0f / curr_val; 164 curr_val = 1.0f / curr_val;
165 165
166 fractional_index = 166 fractional_index =
167 -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); 167 -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
168 assert(fabs(fractional_index) < 1); 168 assert(fabs(fractional_index) < 1);
169 return fractional_index; 169 return fractional_index;
170 } 170 }
171 171
172 // 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope 172 // 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
173 // of the input signal. The local maximum of the spectral envelope corresponds 173 // of the input signal. The local maximum of the spectral envelope corresponds
174 // with the local minimum of A(z). It saves complexity, as we save one 174 // with the local minimum of A(z). It saves complexity, as we save one
175 // inversion. Furthermore, we find the first local maximum of magnitude squared, 175 // inversion. Furthermore, we find the first local maximum of magnitude squared,
176 // to save on one square root. 176 // to save on one square root.
177 void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) { 177 void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,
178 size_t length_f_peak) {
178 assert(length_f_peak >= kNum10msSubframes); 179 assert(length_f_peak >= kNum10msSubframes);
179 double lpc[kNum10msSubframes * (kLpcOrder + 1)]; 180 double lpc[kNum10msSubframes * (kLpcOrder + 1)];
180 // For all sub-frames. 181 // For all sub-frames.
181 GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); 182 GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
182 183
183 const int kNumDftCoefficients = kDftSize / 2 + 1; 184 const size_t kNumDftCoefficients = kDftSize / 2 + 1;
184 float data[kDftSize]; 185 float data[kDftSize];
185 186
186 for (int i = 0; i < kNum10msSubframes; i++) { 187 for (size_t i = 0; i < kNum10msSubframes; i++) {
187 // Convert to float with zero pad. 188 // Convert to float with zero pad.
188 memset(data, 0, sizeof(data)); 189 memset(data, 0, sizeof(data));
189 for (int n = 0; n < kLpcOrder + 1; n++) { 190 for (size_t n = 0; n < kLpcOrder + 1; n++) {
190 data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]); 191 data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
191 } 192 }
192 // Transform to frequency domain. 193 // Transform to frequency domain.
193 WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); 194 WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
194 195
195 int index_peak = 0; 196 size_t index_peak = 0;
196 float prev_magn_sqr = data[0] * data[0]; 197 float prev_magn_sqr = data[0] * data[0];
197 float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; 198 float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
198 float next_magn_sqr; 199 float next_magn_sqr;
199 bool found_peak = false; 200 bool found_peak = false;
200 for (int n = 2; n < kNumDftCoefficients - 1; n++) { 201 for (size_t n = 2; n < kNumDftCoefficients - 1; n++) {
201 next_magn_sqr = 202 next_magn_sqr =
202 data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; 203 data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1];
203 if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { 204 if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
204 found_peak = true; 205 found_peak = true;
205 index_peak = n - 1; 206 index_peak = n - 1;
206 break; 207 break;
207 } 208 }
208 prev_magn_sqr = curr_magn_sqr; 209 prev_magn_sqr = curr_magn_sqr;
209 curr_magn_sqr = next_magn_sqr; 210 curr_magn_sqr = next_magn_sqr;
210 } 211 }
(...skipping 10 matching lines...) Expand all
221 fractional_index = 222 fractional_index =
222 QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); 223 QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr);
223 } 224 }
224 f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; 225 f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
225 } 226 }
226 } 227 }
227 228
228 // Using iSAC functions to estimate pitch gains & lags. 229 // Using iSAC functions to estimate pitch gains & lags.
229 void VadAudioProc::PitchAnalysis(double* log_pitch_gains, 230 void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
230 double* pitch_lags_hz, 231 double* pitch_lags_hz,
231 int length) { 232 size_t length) {
232 // TODO(turajs): This can be "imported" from iSAC & and the next two 233 // TODO(turajs): This can be "imported" from iSAC & and the next two
233 // constants. 234 // constants.
234 assert(length >= kNum10msSubframes); 235 assert(length >= kNum10msSubframes);
235 const int kNumPitchSubframes = 4; 236 const int kNumPitchSubframes = 4;
236 double gains[kNumPitchSubframes]; 237 double gains[kNumPitchSubframes];
237 double lags[kNumPitchSubframes]; 238 double lags[kNumPitchSubframes];
238 239
239 const int kNumSubbandFrameSamples = 240; 240 const int kNumSubbandFrameSamples = 240;
240 const int kNumLookaheadSamples = 24; 241 const int kNumLookaheadSamples = 24;
241 242
(...skipping 11 matching lines...) Expand all
253 WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, 254 WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
254 pitch_analysis_handle_.get(), lags, gains); 255 pitch_analysis_handle_.get(), lags, gains);
255 256
256 // Lags are computed on lower-band signal with sampling rate half of the 257 // Lags are computed on lower-band signal with sampling rate half of the
257 // input signal. 258 // input signal.
258 GetSubframesPitchParameters( 259 GetSubframesPitchParameters(
259 kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, 260 kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,
260 &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); 261 &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);
261 } 262 }
262 263
263 void VadAudioProc::Rms(double* rms, int length_rms) { 264 void VadAudioProc::Rms(double* rms, size_t length_rms) {
264 assert(length_rms >= kNum10msSubframes); 265 assert(length_rms >= kNum10msSubframes);
265 int offset = kNumPastSignalSamples; 266 size_t offset = kNumPastSignalSamples;
266 for (int i = 0; i < kNum10msSubframes; i++) { 267 for (size_t i = 0; i < kNum10msSubframes; i++) {
267 rms[i] = 0; 268 rms[i] = 0;
268 for (int n = 0; n < kNumSubframeSamples; n++, offset++) 269 for (size_t n = 0; n < kNumSubframeSamples; n++, offset++)
269 rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; 270 rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
270 rms[i] = sqrt(rms[i] / kNumSubframeSamples); 271 rms[i] = sqrt(rms[i] / kNumSubframeSamples);
271 } 272 }
272 } 273 }
273 274
274 } // namespace webrtc 275 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/vad/vad_audio_proc.h ('k') | webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698