Index: webrtc/modules/audio_processing/vad/voice_activity_detector.cc |
diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..bf4da684d8ff76c9c74847ab3898554e8fcb0f57 |
--- /dev/null |
+++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc |
@@ -0,0 +1,76 @@ |
+/* |
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
+ * |
+ * Use of this source code is governed by a BSD-style license |
+ * that can be found in the LICENSE file in the root of the source |
+ * tree. An additional intellectual property rights grant can be found |
+ * in the file PATENTS. All contributing project authors may |
+ * be found in the AUTHORS file in the root of the source tree. |
+ */ |
+ |
+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" |
+ |
+#include "webrtc/base/checks.h" |
+ |
+namespace webrtc { |
+namespace { |
+ |
+const int kMaxLength = 320; |
+const int kNumChannels = 1; |
bloch
2015/06/12 23:34:38
Just wondering, is there a VAD implementation that
aluebs-webrtc
2015/06/16 01:17:52
Not right now, and I don't think we will need it,
|
+ |
+const double kDefaultVoiceValue = 1.0; |
+const double kNeutralProbability = 0.5; |
+const double kLowProbability = 0.01; |
+ |
+} // namespace |
+ |
+VoiceActivityDetector::VoiceActivityDetector() |
+ : last_voice_probability_(kDefaultVoiceValue), |
+ // Initialize to the most common resampling situation. |
+ resampler_(kMaxLength, kLength10Ms, kNumChannels), |
+ standalone_vad_(StandaloneVad::Create()) { |
+} |
+ |
+void VoiceActivityDetector::ProcessCaptureAudio(const int16_t* audio, |
+ int length) { |
+ CHECK_LE(length, kMaxLength); |
+ // Resample to the required rate. |
+ const int16_t* resampled_ptr = audio; |
+ if (length != kLength10Ms) { |
Andrew MacDonald
2015/06/15 04:32:25
This assumes that chunks of 10 ms are passed in. I
aluebs-webrtc
2015/06/16 01:17:52
Agreed.
|
+ CHECK_EQ(resampler_.ResetIfNeeded(length, kLength10Ms, kNumChannels), 0); |
+ resampler_.Push(audio, length, resampled_, kLength10Ms, length); |
+ resampled_ptr = resampled_; |
+ } |
+ assert(length == kLength10Ms); |
Andrew MacDonald
2015/06/15 04:32:25
DCHECK_EQ
aluebs-webrtc
2015/06/16 01:17:52
Done.
|
+ |
+ CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0); |
+ |
+ audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); |
+ |
+ chunkwise_voice_probabilities_.clear(); |
Andrew MacDonald
2015/06/15 04:32:25
minor: It might be overall more clear if you resiz
aluebs-webrtc
2015/06/16 01:17:52
But the point in clearing here is that it returns
Andrew MacDonald
2015/06/16 22:18:31
It will work, assuming features_.num_frames == 0 w
aluebs-webrtc
2015/06/17 01:44:31
Good point. I agree it is clearer, so I changed it
|
+ chunkwise_rms_.clear(); |
+ if (features_.num_frames > 0) { |
+ if (features_.silence) { |
+ // The other features are invalid, so set the voice probabilities to an |
+ // arbitrary low value. |
+ chunkwise_voice_probabilities_.assign(features_.num_frames, |
+ kLowProbability); |
+ } else { |
+ chunkwise_voice_probabilities_.assign(features_.num_frames, |
+ kNeutralProbability); |
Andrew MacDonald
2015/06/15 04:32:25
Move the call to standalone_vad_->AddAudio to here
aluebs-webrtc
2015/06/16 01:17:52
But here it will not be run on each chunk, which i
Andrew MacDonald
2015/06/16 22:18:31
Ah OK. Can you put a comment to that effect on the
aluebs-webrtc
2015/06/17 01:44:31
Done.
|
+ CHECK_GE(standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], |
+ kMaxNumFrames), |
Andrew MacDonald
2015/06/15 04:32:25
I'm worried about this call. You're only guarantee
aluebs-webrtc
2015/06/16 01:17:52
That is an excellent point! I think in practice th
Andrew MacDonald
2015/06/16 22:18:31
Where is kMaxNumFrames defined? Can you remove it?
aluebs-webrtc
2015/06/17 01:44:30
It is defined in vad/common.h and used to define t
|
+ 0); |
+ CHECK_GE(pitch_based_vad_.VoicingProbability( |
+ features_, &chunkwise_voice_probabilities_[0]), |
+ 0); |
+ } |
+ last_voice_probability_ = |
+ chunkwise_voice_probabilities_[features_.num_frames - 1]; |
Andrew MacDonald
2015/06/15 04:32:25
chunkwise_voice_probabilities_.last()
aluebs-webrtc
2015/06/16 01:17:52
Done.
|
+ } |
+ for (int i = 0; i < features_.num_frames; ++i) { |
+ chunkwise_rms_.push_back(features_.rms[i]); |
+ } |
+} |
+ |
+} // namespace webrtc |