webrtc/modules/audio_processing/vad/vad_audio_proc.cc - Issue 2320053003: webrtc/modules/audio_processing: Use RTC_DCHECK() instead of assert()

Side by Side Diff: webrtc/modules/audio_processing/vad/vad_audio_proc.cc

Issue 2320053003: webrtc/modules/audio_processing: Use RTC_DCHECK() instead of assert() (Closed)

Patch Set: rebase Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"	11 #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"

12	12

13 #include <math.h>	13 #include <math.h>

14 #include <stdio.h>	14 #include <stdio.h>

15	15

	16 #include "webrtc/base/checks.h"

16 #include "webrtc/common_audio/fft4g.h"	17 #include "webrtc/common_audio/fft4g.h"

17 #include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"	18 #include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"

18 #include "webrtc/modules/audio_processing/vad/pitch_internal.h"	19 #include "webrtc/modules/audio_processing/vad/pitch_internal.h"

19 #include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"	20 #include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"

20 extern "C" {	21 extern "C" {

21 #include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"	22 #include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"

22 #include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"	23 #include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"

23 #include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"	24 #include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"

24 #include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"	25 #include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"

25 }	26 }

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
88 // classification.	89 // classification.

89 if (high_pass_filter_->Filter(frame, kNumSubframeSamples,	90 if (high_pass_filter_->Filter(frame, kNumSubframeSamples,

90 &audio_buffer_[num_buffer_samples_]) != 0) {	91 &audio_buffer_[num_buffer_samples_]) != 0) {

91 return -1;	92 return -1;

92 }	93 }

93	94

94 num_buffer_samples_ += kNumSubframeSamples;	95 num_buffer_samples_ += kNumSubframeSamples;

95 if (num_buffer_samples_ < kBufferLength) {	96 if (num_buffer_samples_ < kBufferLength) {

96 return 0;	97 return 0;

97 }	98 }

98 assert(num_buffer_samples_ == kBufferLength);	99 RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength);

99 features->num_frames = kNum10msSubframes;	100 features->num_frames = kNum10msSubframes;

100 features->silence = false;	101 features->silence = false;

101	102

102 Rms(features->rms, kMaxNumFrames);	103 Rms(features->rms, kMaxNumFrames);

103 for (size_t i = 0; i < kNum10msSubframes; ++i) {	104 for (size_t i = 0; i < kNum10msSubframes; ++i) {

104 if (features->rms[i] < kSilenceRms) {	105 if (features->rms[i] < kSilenceRms) {

105 // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.	106 // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.

106 // Bail out here instead.	107 // Bail out here instead.

107 features->silence = true;	108 features->silence = true;

108 ResetBuffer();	109 ResetBuffer();

109 return 0;	110 return 0;

110 }	111 }

111 }	112 }

112	113

113 PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,	114 PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,

114 kMaxNumFrames);	115 kMaxNumFrames);

115 FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);	116 FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);

116 ResetBuffer();	117 ResetBuffer();

117 return 0;	118 return 0;

118 }	119 }

119	120

120 // Computes \|kLpcOrder + 1\| correlation coefficients.	121 // Computes \|kLpcOrder + 1\| correlation coefficients.

121 void VadAudioProc::SubframeCorrelation(double* corr,	122 void VadAudioProc::SubframeCorrelation(double* corr,

122 size_t length_corr,	123 size_t length_corr,

123 size_t subframe_index) {	124 size_t subframe_index) {

124 assert(length_corr >= kLpcOrder + 1);	125 RTC_DCHECK_GE(length_corr, kLpcOrder + 1);

125 double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];	126 double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];

126 size_t buffer_index = subframe_index * kNumSubframeSamples;	127 size_t buffer_index = subframe_index * kNumSubframeSamples;

127	128

128 for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)	129 for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)

129 windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];	130 windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];

130	131

131 WebRtcIsac_AutoCorr(corr, windowed_audio,	132 WebRtcIsac_AutoCorr(corr, windowed_audio,

132 kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);	133 kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);

133 }	134 }

134	135

135 // Compute \|kNum10msSubframes\| sets of LPC coefficients, one per 10 ms input.	136 // Compute \|kNum10msSubframes\| sets of LPC coefficients, one per 10 ms input.

136 // The analysis window is 15 ms long and it is centered on the first half of	137 // The analysis window is 15 ms long and it is centered on the first half of

137 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the	138 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the

138 // first half of each 10 ms subframe.	139 // first half of each 10 ms subframe.

139 void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {	140 void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {

140 assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));	141 RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1));

141 double corr[kLpcOrder + 1];	142 double corr[kLpcOrder + 1];

142 double reflec_coeff[kLpcOrder];	143 double reflec_coeff[kLpcOrder];

143 for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;	144 for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;

144 i++, offset_lpc += kLpcOrder + 1) {	145 i++, offset_lpc += kLpcOrder + 1) {

145 SubframeCorrelation(corr, kLpcOrder + 1, i);	146 SubframeCorrelation(corr, kLpcOrder + 1, i);

146 corr[0] *= 1.0001;	147 corr[0] *= 1.0001;

147 // This makes Lev-Durb a bit more stable.	148 // This makes Lev-Durb a bit more stable.

148 for (size_t k = 0; k < kLpcOrder + 1; k++) {	149 for (size_t k = 0; k < kLpcOrder + 1; k++) {

149 corr[k] *= kCorrWeight[k];	150 corr[k] *= kCorrWeight[k];

150 }	151 }

151 WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);	152 WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);

152 }	153 }

153 }	154 }

154	155

155 // Fit a second order curve to these 3 points and find the location of the	156 // Fit a second order curve to these 3 points and find the location of the

156 // extremum. The points are inverted before curve fitting.	157 // extremum. The points are inverted before curve fitting.

157 static float QuadraticInterpolation(float prev_val,	158 static float QuadraticInterpolation(float prev_val,

158 float curr_val,	159 float curr_val,

159 float next_val) {	160 float next_val) {

160 // Doing the interpolation in \|1 / A(z)\|^2.	161 // Doing the interpolation in \|1 / A(z)\|^2.

161 float fractional_index = 0;	162 float fractional_index = 0;

162 next_val = 1.0f / next_val;	163 next_val = 1.0f / next_val;

163 prev_val = 1.0f / prev_val;	164 prev_val = 1.0f / prev_val;

164 curr_val = 1.0f / curr_val;	165 curr_val = 1.0f / curr_val;

165	166

166 fractional_index =	167 fractional_index =

167 -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);	168 -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);

168 assert(fabs(fractional_index) < 1);	169 RTC_DCHECK_LT(fabs(fractional_index), 1);

169 return fractional_index;	170 return fractional_index;

170 }	171 }

171	172

172 // 1 / A(z), where A(z) is defined by \|lpc\| is a model of the spectral envelope	173 // 1 / A(z), where A(z) is defined by \|lpc\| is a model of the spectral envelope

173 // of the input signal. The local maximum of the spectral envelope corresponds	174 // of the input signal. The local maximum of the spectral envelope corresponds

174 // with the local minimum of A(z). It saves complexity, as we save one	175 // with the local minimum of A(z). It saves complexity, as we save one

175 // inversion. Furthermore, we find the first local maximum of magnitude squared,	176 // inversion. Furthermore, we find the first local maximum of magnitude squared,

176 // to save on one square root.	177 // to save on one square root.

177 void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,	178 void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,

178 size_t length_f_peak) {	179 size_t length_f_peak) {

179 assert(length_f_peak >= kNum10msSubframes);	180 RTC_DCHECK_GE(length_f_peak, kNum10msSubframes);

180 double lpc[kNum10msSubframes * (kLpcOrder + 1)];	181 double lpc[kNum10msSubframes * (kLpcOrder + 1)];

181 // For all sub-frames.	182 // For all sub-frames.

182 GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));	183 GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));

183	184

184 const size_t kNumDftCoefficients = kDftSize / 2 + 1;	185 const size_t kNumDftCoefficients = kDftSize / 2 + 1;

185 float data[kDftSize];	186 float data[kDftSize];

186	187

187 for (size_t i = 0; i < kNum10msSubframes; i++) {	188 for (size_t i = 0; i < kNum10msSubframes; i++) {

188 // Convert to float with zero pad.	189 // Convert to float with zero pad.

189 memset(data, 0, sizeof(data));	190 memset(data, 0, sizeof(data));

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
225 f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;	226 f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;

226 }	227 }

227 }	228 }

228	229

229 // Using iSAC functions to estimate pitch gains & lags.	230 // Using iSAC functions to estimate pitch gains & lags.

230 void VadAudioProc::PitchAnalysis(double* log_pitch_gains,	231 void VadAudioProc::PitchAnalysis(double* log_pitch_gains,

231 double* pitch_lags_hz,	232 double* pitch_lags_hz,

232 size_t length) {	233 size_t length) {

233 // TODO(turajs): This can be "imported" from iSAC & and the next two	234 // TODO(turajs): This can be "imported" from iSAC & and the next two

234 // constants.	235 // constants.

235 assert(length >= kNum10msSubframes);	236 RTC_DCHECK_GE(length, kNum10msSubframes);

236 const int kNumPitchSubframes = 4;	237 const int kNumPitchSubframes = 4;

237 double gains[kNumPitchSubframes];	238 double gains[kNumPitchSubframes];

238 double lags[kNumPitchSubframes];	239 double lags[kNumPitchSubframes];

239	240

240 const int kNumSubbandFrameSamples = 240;	241 const int kNumSubbandFrameSamples = 240;

241 const int kNumLookaheadSamples = 24;	242 const int kNumLookaheadSamples = 24;

242	243

243 float lower[kNumSubbandFrameSamples];	244 float lower[kNumSubbandFrameSamples];

244 float upper[kNumSubbandFrameSamples];	245 float upper[kNumSubbandFrameSamples];

245 double lower_lookahead[kNumSubbandFrameSamples];	246 double lower_lookahead[kNumSubbandFrameSamples];

246 double upper_lookahead[kNumSubbandFrameSamples];	247 double upper_lookahead[kNumSubbandFrameSamples];

247 double lower_lookahead_pre_filter[kNumSubbandFrameSamples +	248 double lower_lookahead_pre_filter[kNumSubbandFrameSamples +

248 kNumLookaheadSamples];	249 kNumLookaheadSamples];

249	250

250 // Split signal to lower and upper bands	251 // Split signal to lower and upper bands

251 WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,	252 WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,

252 upper, lower_lookahead, upper_lookahead,	253 upper, lower_lookahead, upper_lookahead,

253 pre_filter_handle_.get());	254 pre_filter_handle_.get());

254 WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,	255 WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,

255 pitch_analysis_handle_.get(), lags, gains);	256 pitch_analysis_handle_.get(), lags, gains);

256	257

257 // Lags are computed on lower-band signal with sampling rate half of the	258 // Lags are computed on lower-band signal with sampling rate half of the

258 // input signal.	259 // input signal.

259 GetSubframesPitchParameters(	260 GetSubframesPitchParameters(

260 kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,	261 kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,

261 &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);	262 &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);

262 }	263 }

263	264

264 void VadAudioProc::Rms(double* rms, size_t length_rms) {	265 void VadAudioProc::Rms(double* rms, size_t length_rms) {

265 assert(length_rms >= kNum10msSubframes);	266 RTC_DCHECK_GE(length_rms, kNum10msSubframes);

266 size_t offset = kNumPastSignalSamples;	267 size_t offset = kNumPastSignalSamples;

267 for (size_t i = 0; i < kNum10msSubframes; i++) {	268 for (size_t i = 0; i < kNum10msSubframes; i++) {

268 rms[i] = 0;	269 rms[i] = 0;

269 for (size_t n = 0; n < kNumSubframeSamples; n++, offset++)	270 for (size_t n = 0; n < kNumSubframeSamples; n++, offset++)

270 rms[i] += audio_buffer_[offset] * audio_buffer_[offset];	271 rms[i] += audio_buffer_[offset] * audio_buffer_[offset];

271 rms[i] = sqrt(rms[i] / kNumSubframeSamples);	272 rms[i] = sqrt(rms[i] / kNumSubframeSamples);

272 }	273 }

273 }	274 }

274	275

275 } // namespace webrtc	276 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/vad/vad_audio_proc.h ('k') | webrtc/modules/audio_processing/vad/vad_circular_buffer.cc » ('j') | no next file with comments »