webrtc/modules/audio_processing/vad/vad_audio_proc.cc - Issue 1192863006: Revert "Pull the Voice Activity Detector out from the AGC"

Unified Diff: webrtc/modules/audio_processing/vad/vad_audio_proc.cc

Issue 1192863006: Revert "Pull the Voice Activity Detector out from the AGC" (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « webrtc/modules/audio_processing/vad/vad_audio_proc.h ('k') | webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/vad/vad_audio_proc.cc

diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc

deleted file mode 100644

index e8f27f802de5468ca97e36e3e06ea82ef6d7cf44..0000000000000000000000000000000000000000

--- a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc

+++ /dev/null

@@ -1,274 +0,0 @@

-/*

- *

- * Use of this source code is governed by a BSD-style license

- * that can be found in the LICENSE file in the root of the source

- * tree. An additional intellectual property rights grant can be found

- * in the file PATENTS. All contributing project authors may

- * be found in the AUTHORS file in the root of the source tree.

- */

-#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"

-#include <math.h>

-#include <stdio.h>

-#include "webrtc/common_audio/fft4g.h"

-#include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"

-#include "webrtc/modules/audio_processing/vad/pitch_internal.h"

-#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"

-extern "C" {

-#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"

-#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"

-#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"

-#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"

-#include "webrtc/modules/interface/module_common_types.h"

-namespace webrtc {

-// The following structures are declared anonymous in iSAC's structs.h. To

-// forward declare them, we use this derived class trick.

-struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};

-struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};

-static const float kFrequencyResolution =

- kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize);

-static const int kSilenceRms = 5;

-// TODO(turajs): Make a Create or Init for VadAudioProc.

-VadAudioProc::VadAudioProc()

- : audio_buffer_(),

- num_buffer_samples_(kNumPastSignalSamples),

- log_old_gain_(-2),

- old_lag_(50), // Arbitrary but valid as pitch-lag (in samples).

- pitch_analysis_handle_(new PitchAnalysisStruct),

- pre_filter_handle_(new PreFiltBankstr),

- high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator,

- kFilterOrder,

- kCoeffDenominator,

- kFilterOrder)) {

- static_assert(kNumPastSignalSamples + kNumSubframeSamples ==

- sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),

- "lpc analysis window incorrect size");

- static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),

- "correlation weight incorrect size");

- // TODO(turajs): Are we doing too much in the constructor?

- float data[kDftSize];

- // Make FFT to initialize.

- ip_[0] = 0;

- WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);

- // TODO(turajs): Need to initialize high-pass filter.

- // Initialize iSAC components.

- WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());

- WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());

-VadAudioProc::~VadAudioProc() {

-void VadAudioProc::ResetBuffer() {

- memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],

- sizeof(audio_buffer_[0]) * kNumPastSignalSamples);

- num_buffer_samples_ = kNumPastSignalSamples;

-int VadAudioProc::ExtractFeatures(const int16_t* frame,

- int length,

- AudioFeatures* features) {

- features->num_frames = 0;

- if (length != kNumSubframeSamples) {

- return -1;

- }

- // High-pass filter to remove the DC component and very low frequency content.

- // We have experienced that this high-pass filtering improves voice/non-voiced

- // classification.

- if (high_pass_filter_->Filter(frame, kNumSubframeSamples,

- &audio_buffer_[num_buffer_samples_]) != 0) {

- return -1;

- }

- num_buffer_samples_ += kNumSubframeSamples;

- if (num_buffer_samples_ < kBufferLength) {

- return 0;

- }

- assert(num_buffer_samples_ == kBufferLength);

- features->num_frames = kNum10msSubframes;

- features->silence = false;

- Rms(features->rms, kMaxNumFrames);

- for (int i = 0; i < kNum10msSubframes; ++i) {

- if (features->rms[i] < kSilenceRms) {

- // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.

- // Bail out here instead.

- features->silence = true;

- ResetBuffer();

- return 0;

- }

- PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,

- kMaxNumFrames);

- FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);

- ResetBuffer();

- return 0;

-// Computes |kLpcOrder + 1| correlation coefficients.

-void VadAudioProc::SubframeCorrelation(double* corr,

- int length_corr,

- int subframe_index) {

- assert(length_corr >= kLpcOrder + 1);

- double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];

- int buffer_index = subframe_index * kNumSubframeSamples;

- for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)

- windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];

- WebRtcIsac_AutoCorr(corr, windowed_audio,

- kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);

-// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.

-// The analysis window is 15 ms long and it is centered on the first half of

-// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the

-// first half of each 10 ms subframe.

-void VadAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) {

- assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));

- double corr[kLpcOrder + 1];

- double reflec_coeff[kLpcOrder];

- for (int i = 0, offset_lpc = 0; i < kNum10msSubframes;

- i++, offset_lpc += kLpcOrder + 1) {

- SubframeCorrelation(corr, kLpcOrder + 1, i);

- corr[0] *= 1.0001;

- // This makes Lev-Durb a bit more stable.

- for (int k = 0; k < kLpcOrder + 1; k++) {

- corr[k] *= kCorrWeight[k];

- }

- WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);

- }

-// Fit a second order curve to these 3 points and find the location of the

-// extremum. The points are inverted before curve fitting.

-static float QuadraticInterpolation(float prev_val,

- float curr_val,

- float next_val) {

- // Doing the interpolation in |1 / A(z)|^2.

- float fractional_index = 0;

- next_val = 1.0f / next_val;

- prev_val = 1.0f / prev_val;

- curr_val = 1.0f / curr_val;

- fractional_index =

- -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);

- assert(fabs(fractional_index) < 1);

- return fractional_index;

-// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope

-// of the input signal. The local maximum of the spectral envelope corresponds

-// with the local minimum of A(z). It saves complexity, as we save one

-// inversion. Furthermore, we find the first local maximum of magnitude squared,

-// to save on one square root.

-void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) {

- assert(length_f_peak >= kNum10msSubframes);

- double lpc[kNum10msSubframes * (kLpcOrder + 1)];

- // For all sub-frames.

- GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));

- const int kNumDftCoefficients = kDftSize / 2 + 1;

- float data[kDftSize];

- for (int i = 0; i < kNum10msSubframes; i++) {

- // Convert to float with zero pad.

- memset(data, 0, sizeof(data));

- for (int n = 0; n < kLpcOrder + 1; n++) {

- data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);

- }

- // Transform to frequency domain.

- WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);

- int index_peak = 0;

- float prev_magn_sqr = data[0] * data[0];

- float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];

- float next_magn_sqr;

- bool found_peak = false;

- for (int n = 2; n < kNumDftCoefficients - 1; n++) {

- next_magn_sqr =

- data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1];

- if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {

- found_peak = true;

- index_peak = n - 1;

- break;

- }

- prev_magn_sqr = curr_magn_sqr;

- curr_magn_sqr = next_magn_sqr;

- }

- float fractional_index = 0;

- if (!found_peak) {

- // Checking if |kNumDftCoefficients - 1| is the local minimum.

- next_magn_sqr = data[1] * data[1];

- if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {

- index_peak = kNumDftCoefficients - 1;

- }

- } else {

- // A peak is found, do a simple quadratic interpolation to get a more

- // accurate estimate of the peak location.

- fractional_index =

- QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr);

- }

- f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;

- }

-// Using iSAC functions to estimate pitch gains & lags.

-void VadAudioProc::PitchAnalysis(double* log_pitch_gains,

- double* pitch_lags_hz,

- int length) {

- // TODO(turajs): This can be "imported" from iSAC & and the next two

- // constants.

- assert(length >= kNum10msSubframes);

- const int kNumPitchSubframes = 4;

- double gains[kNumPitchSubframes];

- double lags[kNumPitchSubframes];

- const int kNumSubbandFrameSamples = 240;

- const int kNumLookaheadSamples = 24;

- float lower[kNumSubbandFrameSamples];

- float upper[kNumSubbandFrameSamples];

- double lower_lookahead[kNumSubbandFrameSamples];

- double upper_lookahead[kNumSubbandFrameSamples];

- double lower_lookahead_pre_filter[kNumSubbandFrameSamples +

- kNumLookaheadSamples];

- // Split signal to lower and upper bands

- WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower,

- upper, lower_lookahead, upper_lookahead,

- pre_filter_handle_.get());

- WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,

- pitch_analysis_handle_.get(), lags, gains);

- // Lags are computed on lower-band signal with sampling rate half of the

- // input signal.

- GetSubframesPitchParameters(

- kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes,

- &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz);

-void VadAudioProc::Rms(double* rms, int length_rms) {

- assert(length_rms >= kNum10msSubframes);

- int offset = kNumPastSignalSamples;

- for (int i = 0; i < kNum10msSubframes; i++) {

- rms[i] = 0;

- for (int n = 0; n < kNumSubframeSamples; n++, offset++)

- rms[i] += audio_buffer_[offset] * audio_buffer_[offset];

- rms[i] = sqrt(rms[i] / kNumSubframeSamples);

- }

-} // namespace webrtc