Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: webrtc/modules/audio_processing/vad/vad_audio_proc.cc

Issue 2320053003: webrtc/modules/audio_processing: Use RTC_DCHECK() instead of assert() (Closed)
Patch Set: rebase Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" 11 #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
12 12
13 #include <math.h> 13 #include <math.h>
14 #include <stdio.h> 14 #include <stdio.h>
15 15
16 #include "webrtc/base/checks.h"
16 #include "webrtc/common_audio/fft4g.h" 17 #include "webrtc/common_audio/fft4g.h"
17 #include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h" 18 #include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"
18 #include "webrtc/modules/audio_processing/vad/pitch_internal.h" 19 #include "webrtc/modules/audio_processing/vad/pitch_internal.h"
19 #include "webrtc/modules/audio_processing/vad/pole_zero_filter.h" 20 #include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"
20 extern "C" { 21 extern "C" {
21 #include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h" 22 #include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
22 #include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h" 23 #include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
23 #include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" 24 #include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
24 #include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h" 25 #include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
25 } 26 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 // classification. 89 // classification.
89 if (high_pass_filter_->Filter(frame, kNumSubframeSamples, 90 if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
90 &audio_buffer_[num_buffer_samples_]) != 0) { 91 &audio_buffer_[num_buffer_samples_]) != 0) {
91 return -1; 92 return -1;
92 } 93 }
93 94
94 num_buffer_samples_ += kNumSubframeSamples; 95 num_buffer_samples_ += kNumSubframeSamples;
95 if (num_buffer_samples_ < kBufferLength) { 96 if (num_buffer_samples_ < kBufferLength) {
96 return 0; 97 return 0;
97 } 98 }
98 assert(num_buffer_samples_ == kBufferLength); 99 RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength);
99 features->num_frames = kNum10msSubframes; 100 features->num_frames = kNum10msSubframes;
100 features->silence = false; 101 features->silence = false;
101 102
102 Rms(features->rms, kMaxNumFrames); 103 Rms(features->rms, kMaxNumFrames);
103 for (size_t i = 0; i < kNum10msSubframes; ++i) { 104 for (size_t i = 0; i < kNum10msSubframes; ++i) {
104 if (features->rms[i] < kSilenceRms) { 105 if (features->rms[i] < kSilenceRms) {
105 // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. 106 // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
106 // Bail out here instead. 107 // Bail out here instead.
107 features->silence = true; 108 features->silence = true;
108 ResetBuffer(); 109 ResetBuffer();
109 return 0; 110 return 0;
110 } 111 }
111 } 112 }
112 113
113 PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, 114 PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
114 kMaxNumFrames); 115 kMaxNumFrames);
115 FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); 116 FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
116 ResetBuffer(); 117 ResetBuffer();
117 return 0; 118 return 0;
118 } 119 }
119 120
120 // Computes |kLpcOrder + 1| correlation coefficients. 121 // Computes |kLpcOrder + 1| correlation coefficients.
121 void VadAudioProc::SubframeCorrelation(double* corr, 122 void VadAudioProc::SubframeCorrelation(double* corr,
122 size_t length_corr, 123 size_t length_corr,
123 size_t subframe_index) { 124 size_t subframe_index) {
124 assert(length_corr >= kLpcOrder + 1); 125 RTC_DCHECK_GE(length_corr, kLpcOrder + 1);
125 double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; 126 double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
126 size_t buffer_index = subframe_index * kNumSubframeSamples; 127 size_t buffer_index = subframe_index * kNumSubframeSamples;
127 128
128 for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) 129 for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
129 windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; 130 windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
130 131
131 WebRtcIsac_AutoCorr(corr, windowed_audio, 132 WebRtcIsac_AutoCorr(corr, windowed_audio,
132 kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); 133 kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
133 } 134 }
134 135
135 // Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. 136 // Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
136 // The analysis window is 15 ms long and it is centered on the first half of 137 // The analysis window is 15 ms long and it is centered on the first half of
137 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the 138 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
138 // first half of each 10 ms subframe. 139 // first half of each 10 ms subframe.
139 void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) { 140 void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {
140 assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1)); 141 RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1));
141 double corr[kLpcOrder + 1]; 142 double corr[kLpcOrder + 1];
142 double reflec_coeff[kLpcOrder]; 143 double reflec_coeff[kLpcOrder];
143 for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes; 144 for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;
144 i++, offset_lpc += kLpcOrder + 1) { 145 i++, offset_lpc += kLpcOrder + 1) {
145 SubframeCorrelation(corr, kLpcOrder + 1, i); 146 SubframeCorrelation(corr, kLpcOrder + 1, i);
146 corr[0] *= 1.0001; 147 corr[0] *= 1.0001;
147 // This makes Lev-Durb a bit more stable. 148 // This makes Lev-Durb a bit more stable.
148 for (size_t k = 0; k < kLpcOrder + 1; k++) { 149 for (size_t k = 0; k < kLpcOrder + 1; k++) {
149 corr[k] *= kCorrWeight[k]; 150 corr[k] *= kCorrWeight[k];
150 } 151 }
151 WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); 152 WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
152 } 153 }
153 } 154 }
154 155
155 // Fit a second order curve to these 3 points and find the location of the 156 // Fit a second order curve to these 3 points and find the location of the
156 // extremum. The points are inverted before curve fitting. 157 // extremum. The points are inverted before curve fitting.
157 static float QuadraticInterpolation(float prev_val, 158 static float QuadraticInterpolation(float prev_val,
158 float curr_val, 159 float curr_val,
159 float next_val) { 160 float next_val) {
160 // Doing the interpolation in |1 / A(z)|^2. 161 // Doing the interpolation in |1 / A(z)|^2.
161 float fractional_index = 0; 162 float fractional_index = 0;
162 next_val = 1.0f / next_val; 163 next_val = 1.0f / next_val;
163 prev_val = 1.0f / prev_val; 164 prev_val = 1.0f / prev_val;
164 curr_val = 1.0f / curr_val; 165 curr_val = 1.0f / curr_val;
165 166
166 fractional_index = 167 fractional_index =
167 -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); 168 -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
168 assert(fabs(fractional_index) < 1); 169 RTC_DCHECK_LT(fabs(fractional_index), 1);
169 return fractional_index; 170 return fractional_index;
170 } 171 }
171 172
172 // 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope 173 // 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
173 // of the input signal. The local maximum of the spectral envelope corresponds 174 // of the input signal. The local maximum of the spectral envelope corresponds
174 // with the local minimum of A(z). It saves complexity, as we save one 175 // with the local minimum of A(z). It saves complexity, as we save one
175 // inversion. Furthermore, we find the first local maximum of magnitude squared, 176 // inversion. Furthermore, we find the first local maximum of magnitude squared,
176 // to save on one square root. 177 // to save on one square root.
177 void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, 178 void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,
178 size_t length_f_peak) { 179 size_t length_f_peak) {
179 assert(length_f_peak >= kNum10msSubframes); 180 RTC_DCHECK_GE(length_f_peak, kNum10msSubframes);
180 double lpc[kNum10msSubframes * (kLpcOrder + 1)]; 181 double lpc[kNum10msSubframes * (kLpcOrder + 1)];
181 // For all sub-frames. 182 // For all sub-frames.
182 GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); 183 GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
183 184
184 const size_t kNumDftCoefficients = kDftSize / 2 + 1; 185 const size_t kNumDftCoefficients = kDftSize / 2 + 1;
185 float data[kDftSize]; 186 float data[kDftSize];
186 187
187 for (size_t i = 0; i < kNum10msSubframes; i++) { 188 for (size_t i = 0; i < kNum10msSubframes; i++) {
188 // Convert to float with zero pad. 189 // Convert to float with zero pad.
189 memset(data, 0, sizeof(data)); 190 memset(data, 0, sizeof(data));
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
225 f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; 226 f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
226 } 227 }
227 } 228 }
228 229
229 // Using iSAC functions to estimate pitch gains & lags. 230 // Using iSAC functions to estimate pitch gains & lags.
230 void VadAudioProc::PitchAnalysis(double* log_pitch_gains, 231 void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
231 double* pitch_lags_hz, 232 double* pitch_lags_hz,
232 size_t length) { 233 size_t length) {
233 // TODO(turajs): This can be "imported" from iSAC & and the next two 234 // TODO(turajs): This can be "imported" from iSAC & and the next two
234 // constants. 235 // constants.
235 assert(length >= kNum10msSubframes); 236 RTC_DCHECK_GE(length, kNum10msSubframes);
236 const int kNumPitchSubframes = 4; 237 const int kNumPitchSubframes = 4;
237 double gains[kNumPitchSubframes]; 238 double gains[kNumPitchSubframes];
238 double lags[kNumPitchSubframes]; 239 double lags[kNumPitchSubframes];
239 240
240 const int kNumSubbandFrameSamples = 240; 241 const int kNumSubbandFrameSamples = 240;
241 const int kNumLookaheadSamples = 24; 242 const int kNumLookaheadSamples = 24;
242 243
243 float lower[kNumSubbandFrameSamples]; 244 float lower[kNumSubbandFrameSamples];
244 float upper[kNumSubbandFrameSamples]; 245 float upper[kNumSubbandFrameSamples];
245 double lower_lookahead[kNumSubbandFrameSamples]; 246 double lower_lookahead[kNumSubbandFrameSamples];
246 double upper_lookahead[kNumSubbandFrameSamples]; 247 double upper_lookahead[kNumSubbandFrameSamples];
247 double lower_lookahead_pre_filter[kNumSubbandFrameSamples + 248 double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
248 kNumLookaheadSamples]; 249 kNumLookaheadSamples];
249 250
250 // Split signal to lower and upper bands 251 // Split signal to lower and upper bands
251 WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, 252 WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,
252 upper, lower_lookahead, upper_lookahead, 253 upper, lower_lookahead, upper_lookahead,
253 pre_filter_handle_.get()); 254 pre_filter_handle_.get());
254 WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, 255 WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
255 pitch_analysis_handle_.get(), lags, gains); 256 pitch_analysis_handle_.get(), lags, gains);
256 257
257 // Lags are computed on lower-band signal with sampling rate half of the 258 // Lags are computed on lower-band signal with sampling rate half of the
258 // input signal. 259 // input signal.
259 GetSubframesPitchParameters( 260 GetSubframesPitchParameters(
260 kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, 261 kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,
261 &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); 262 &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);
262 } 263 }
263 264
264 void VadAudioProc::Rms(double* rms, size_t length_rms) { 265 void VadAudioProc::Rms(double* rms, size_t length_rms) {
265 assert(length_rms >= kNum10msSubframes); 266 RTC_DCHECK_GE(length_rms, kNum10msSubframes);
266 size_t offset = kNumPastSignalSamples; 267 size_t offset = kNumPastSignalSamples;
267 for (size_t i = 0; i < kNum10msSubframes; i++) { 268 for (size_t i = 0; i < kNum10msSubframes; i++) {
268 rms[i] = 0; 269 rms[i] = 0;
269 for (size_t n = 0; n < kNumSubframeSamples; n++, offset++) 270 for (size_t n = 0; n < kNumSubframeSamples; n++, offset++)
270 rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; 271 rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
271 rms[i] = sqrt(rms[i] / kNumSubframeSamples); 272 rms[i] = sqrt(rms[i] / kNumSubframeSamples);
272 } 273 }
273 } 274 }
274 275
275 } // namespace webrtc 276 } // namespace webrtc
OLDNEW
« no previous file with comments | « webrtc/modules/audio_processing/vad/vad_audio_proc.h ('k') | webrtc/modules/audio_processing/vad/vad_circular_buffer.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698