Chromium Code Reviews| Index: webrtc/modules/audio_processing/vad/voice_activity_detector.cc |
| diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..fdce34f1e81bc09a6e0bce8edca6f548a0db7f10 |
| --- /dev/null |
| +++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc |
| @@ -0,0 +1,83 @@ |
| +/* |
| + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
| + * |
| + * Use of this source code is governed by a BSD-style license |
| + * that can be found in the LICENSE file in the root of the source |
| + * tree. An additional intellectual property rights grant can be found |
| + * in the file PATENTS. All contributing project authors may |
| + * be found in the AUTHORS file in the root of the source tree. |
| + */ |
| + |
| +#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" |
| + |
| +#include "webrtc/base/checks.h" |
| + |
| +namespace webrtc { |
| +namespace { |
| + |
| +const int kMaxLength = 320; |
| +const int kNumChannels = 1; |
| + |
| +const double kDefaultVoiceValue = 1.0; |
| +const double kNeutralProbability = 0.5; |
| +const double kLowProbability = 0.01; |
| + |
| +} // namespace |
| + |
| +VoiceActivityDetector::VoiceActivityDetector() |
| + : last_voice_probability_(kDefaultVoiceValue), |
| + // Initialize to the most common resampling situation. |
| + resampler_(kMaxLength, kLength10Ms, kNumChannels), |
| + standalone_vad_(StandaloneVad::Create()) { |
| +} |
| + |
| +// Because ISAC has a different chunk length, it updates |
| +// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. |
| +// Else it clears them. |
|
Andrew MacDonald
2015/06/16 22:18:32
Else -> Otherwise
aluebs-webrtc
2015/06/17 01:44:31
Done.
|
| +void VoiceActivityDetector::ProcessChunk(const int16_t* audio, |
| + int length, |
| + int sample_rate_hz) { |
| + DCHECK_EQ(length, sample_rate_hz / 100); |
| + DCHECK_LE(length, kMaxLength); |
| + // Resample to the required rate. |
| + const int16_t* resampled_ptr = audio; |
| + if (sample_rate_hz != kSampleRateHz) { |
| + CHECK_EQ( |
| + resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), |
| + 0); |
| + resampler_.Push(audio, length, resampled_, kLength10Ms, length); |
| + resampled_ptr = resampled_; |
| + } |
| + DCHECK_EQ(length, kLength10Ms); |
| + |
| + CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0); |
| + |
| + audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); |
| + |
| + chunkwise_voice_probabilities_.clear(); |
| + chunkwise_rms_.clear(); |
| + if (features_.num_frames > 0) { |
| + if (features_.silence) { |
| + // The other features are invalid, so set the voice probabilities to an |
| + // arbitrary low value. |
| + chunkwise_voice_probabilities_.assign(features_.num_frames, |
| + kLowProbability); |
| + } else { |
| + chunkwise_voice_probabilities_.assign(features_.num_frames, |
| + kNeutralProbability); |
| + CHECK_GE( |
| + standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], |
| + chunkwise_voice_probabilities_.size()), |
| + 0); |
| + CHECK_GE(pitch_based_vad_.VoicingProbability( |
| + features_, &chunkwise_voice_probabilities_[0]), |
| + 0); |
| + } |
| + last_voice_probability_ = chunkwise_voice_probabilities_.back(); |
| + } |
| + for (int i = 0; i < features_.num_frames; ++i) { |
| + chunkwise_rms_.push_back(features_.rms[i]); |
| + } |
| +} |
| + |
| +} // namespace webrtc |