OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" | |
12 | |
13 #include "webrtc/base/checks.h" | |
14 | |
15 namespace webrtc { | |
16 namespace { | |
17 | |
18 const int kMaxLength = 320; | |
19 const int kNumChannels = 1; | |
bloch
2015/06/12 23:34:38
Just wondering, is there a VAD implementation that
aluebs-webrtc
2015/06/16 01:17:52
Not right now, and I don't think we will need it,
| |
20 | |
21 const double kDefaultVoiceValue = 1.0; | |
22 const double kNeutralProbability = 0.5; | |
23 const double kLowProbability = 0.01; | |
24 | |
25 } // namespace | |
26 | |
27 VoiceActivityDetector::VoiceActivityDetector() | |
28 : last_voice_probability_(kDefaultVoiceValue), | |
29 // Initialize to the most common resampling situation. | |
30 resampler_(kMaxLength, kLength10Ms, kNumChannels), | |
31 standalone_vad_(StandaloneVad::Create()) { | |
32 } | |
33 | |
34 void VoiceActivityDetector::ProcessCaptureAudio(const int16_t* audio, | |
35 int length) { | |
36 CHECK_LE(length, kMaxLength); | |
37 // Resample to the required rate. | |
38 const int16_t* resampled_ptr = audio; | |
39 if (length != kLength10Ms) { | |
Andrew MacDonald
2015/06/15 04:32:25
This assumes that chunks of 10 ms are passed in. I
aluebs-webrtc
2015/06/16 01:17:52
Agreed.
| |
40 CHECK_EQ(resampler_.ResetIfNeeded(length, kLength10Ms, kNumChannels), 0); | |
41 resampler_.Push(audio, length, resampled_, kLength10Ms, length); | |
42 resampled_ptr = resampled_; | |
43 } | |
44 assert(length == kLength10Ms); | |
Andrew MacDonald
2015/06/15 04:32:25
DCHECK_EQ
aluebs-webrtc
2015/06/16 01:17:52
Done.
| |
45 | |
46 CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0); | |
47 | |
48 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); | |
49 | |
50 chunkwise_voice_probabilities_.clear(); | |
Andrew MacDonald
2015/06/15 04:32:25
minor: It might be overall more clear if you resiz
aluebs-webrtc
2015/06/16 01:17:52
But the point in clearing here is that it returns
Andrew MacDonald
2015/06/16 22:18:31
It will work, assuming features_.num_frames == 0 w
aluebs-webrtc
2015/06/17 01:44:31
Good point. I agree it is clearer, so I changed it
| |
51 chunkwise_rms_.clear(); | |
52 if (features_.num_frames > 0) { | |
53 if (features_.silence) { | |
54 // The other features are invalid, so set the voice probabilities to an | |
55 // arbitrary low value. | |
56 chunkwise_voice_probabilities_.assign(features_.num_frames, | |
57 kLowProbability); | |
58 } else { | |
59 chunkwise_voice_probabilities_.assign(features_.num_frames, | |
60 kNeutralProbability); | |
Andrew MacDonald
2015/06/15 04:32:25
Move the call to standalone_vad_->AddAudio to here
aluebs-webrtc
2015/06/16 01:17:52
But here it will not be run on each chunk, which i
Andrew MacDonald
2015/06/16 22:18:31
Ah OK. Can you put a comment to that effect on the
aluebs-webrtc
2015/06/17 01:44:31
Done.
| |
61 CHECK_GE(standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], | |
62 kMaxNumFrames), | |
Andrew MacDonald
2015/06/15 04:32:25
I'm worried about this call. You're only guarantee
aluebs-webrtc
2015/06/16 01:17:52
That is an excellent point! I think in practice th
Andrew MacDonald
2015/06/16 22:18:31
Where is kMaxNumFrames defined? Can you remove it?
aluebs-webrtc
2015/06/17 01:44:30
It is defined in vad/common.h and used to define t
| |
63 0); | |
64 CHECK_GE(pitch_based_vad_.VoicingProbability( | |
65 features_, &chunkwise_voice_probabilities_[0]), | |
66 0); | |
67 } | |
68 last_voice_probability_ = | |
69 chunkwise_voice_probabilities_[features_.num_frames - 1]; | |
Andrew MacDonald
2015/06/15 04:32:25
chunkwise_voice_probabilities_.last()
aluebs-webrtc
2015/06/16 01:17:52
Done.
| |
70 } | |
71 for (int i = 0; i < features_.num_frames; ++i) { | |
72 chunkwise_rms_.push_back(features_.rms[i]); | |
73 } | |
74 } | |
75 | |
76 } // namespace webrtc | |
OLD | NEW |