OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" | |
12 | |
13 #include <algorithm> | |
14 | |
15 #include "webrtc/base/checks.h" | |
16 | |
17 namespace webrtc { | |
18 namespace { | |
19 | |
20 const int kMaxLength = 320; | |
21 const int kNumChannels = 1; | |
22 | |
23 const double kDefaultVoiceValue = 1.0; | |
24 const double kNeutralProbability = 0.5; | |
25 const double kLowProbability = 0.01; | |
26 | |
27 } // namespace | |
28 | |
29 VoiceActivityDetector::VoiceActivityDetector() | |
30 : last_voice_probability_(kDefaultVoiceValue), | |
31 // Initialize to the most common resampling situation. | |
32 resampler_(kMaxLength, kLength10Ms, kNumChannels), | |
33 standalone_vad_(StandaloneVad::Create()) { | |
34 } | |
35 | |
36 // Because ISAC has a different chunk length, it updates | |
37 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. | |
38 // Otherwise it clears them. | |
39 void VoiceActivityDetector::ProcessChunk(const int16_t* audio, | |
40 int length, | |
41 int sample_rate_hz) { | |
42 DCHECK_EQ(length, sample_rate_hz / 100); | |
43 DCHECK_LE(length, kMaxLength); | |
44 // Resample to the required rate. | |
45 const int16_t* resampled_ptr = audio; | |
46 if (sample_rate_hz != kSampleRateHz) { | |
47 CHECK_EQ( | |
48 resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), | |
49 0); | |
50 resampler_.Push(audio, length, resampled_, kLength10Ms, length); | |
51 resampled_ptr = resampled_; | |
52 } | |
53 DCHECK_EQ(length, kLength10Ms); | |
54 | |
55 // Each chunk needs to be passed into |standalone_vad_|, because internally it | |
56 // buffers the audio and processes it all at once when GetActivity() is | |
57 // called. | |
58 CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0); | |
59 | |
60 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); | |
61 | |
62 chunkwise_voice_probabilities_.resize(features_.num_frames); | |
63 chunkwise_rms_.resize(features_.num_frames); | |
64 std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), | |
65 chunkwise_rms_.begin()); | |
66 if (features_.num_frames > 0) { | |
67 if (features_.silence) { | |
68 // The other features are invalid, so set the voice probabilities to an | |
69 // arbitrary low value. | |
70 std::fill(chunkwise_voice_probabilities_.begin(), | |
71 chunkwise_voice_probabilities_.end(), kLowProbability); | |
72 } else { | |
73 std::fill(chunkwise_voice_probabilities_.begin(), | |
74 chunkwise_voice_probabilities_.end(), kNeutralProbability); | |
75 CHECK_GE( | |
76 standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], | |
77 chunkwise_voice_probabilities_.size()), | |
78 0); | |
79 CHECK_GE(pitch_based_vad_.VoicingProbability( | |
80 features_, &chunkwise_voice_probabilities_[0]), | |
81 0); | |
82 } | |
83 last_voice_probability_ = chunkwise_voice_probabilities_.back(); | |
84 } | |
85 } | |
86 | |
87 } // namespace webrtc | |
OLD | NEW |