Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license | |
| 5 * that can be found in the LICENSE file in the root of the source | |
| 6 * tree. An additional intellectual property rights grant can be found | |
| 7 * in the file PATENTS. All contributing project authors may | |
| 8 * be found in the AUTHORS file in the root of the source tree. | |
| 9 */ | |
| 10 | |
| 11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" | |
| 12 | |
| 13 #include "webrtc/base/checks.h" | |
| 14 | |
| 15 namespace webrtc { | |
| 16 namespace { | |
| 17 | |
| 18 const int kMaxLength = 320; | |
| 19 const int kNumChannels = 1; | |
| 20 | |
| 21 const double kDefaultVoiceValue = 1.0; | |
| 22 const double kNeutralProbability = 0.5; | |
| 23 const double kLowProbability = 0.01; | |
| 24 | |
| 25 } // namespace | |
| 26 | |
| 27 VoiceActivityDetector::VoiceActivityDetector() | |
| 28 : last_voice_probability_(kDefaultVoiceValue), | |
| 29 // Initialize to the most common resampling situation. | |
| 30 resampler_(kMaxLength, kLength10Ms, kNumChannels), | |
| 31 standalone_vad_(StandaloneVad::Create()) { | |
| 32 } | |
| 33 | |
| 34 // Because ISAC has a different chunk length, it updates | |
| 35 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. | |
| 36 // Else it clears them. | |
|
Andrew MacDonald
2015/06/16 22:18:32
Else -> Otherwise
aluebs-webrtc
2015/06/17 01:44:31
Done.
| |
| 37 void VoiceActivityDetector::ProcessChunk(const int16_t* audio, | |
| 38 int length, | |
| 39 int sample_rate_hz) { | |
| 40 DCHECK_EQ(length, sample_rate_hz / 100); | |
| 41 DCHECK_LE(length, kMaxLength); | |
| 42 // Resample to the required rate. | |
| 43 const int16_t* resampled_ptr = audio; | |
| 44 if (sample_rate_hz != kSampleRateHz) { | |
| 45 CHECK_EQ( | |
| 46 resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), | |
| 47 0); | |
| 48 resampler_.Push(audio, length, resampled_, kLength10Ms, length); | |
| 49 resampled_ptr = resampled_; | |
| 50 } | |
| 51 DCHECK_EQ(length, kLength10Ms); | |
| 52 | |
| 53 CHECK_EQ(standalone_vad_->AddAudio(audio, length), 0); | |
| 54 | |
| 55 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); | |
| 56 | |
| 57 chunkwise_voice_probabilities_.clear(); | |
| 58 chunkwise_rms_.clear(); | |
| 59 if (features_.num_frames > 0) { | |
| 60 if (features_.silence) { | |
| 61 // The other features are invalid, so set the voice probabilities to an | |
| 62 // arbitrary low value. | |
| 63 chunkwise_voice_probabilities_.assign(features_.num_frames, | |
| 64 kLowProbability); | |
| 65 } else { | |
| 66 chunkwise_voice_probabilities_.assign(features_.num_frames, | |
| 67 kNeutralProbability); | |
| 68 CHECK_GE( | |
| 69 standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], | |
| 70 chunkwise_voice_probabilities_.size()), | |
| 71 0); | |
| 72 CHECK_GE(pitch_based_vad_.VoicingProbability( | |
| 73 features_, &chunkwise_voice_probabilities_[0]), | |
| 74 0); | |
| 75 } | |
| 76 last_voice_probability_ = chunkwise_voice_probabilities_.back(); | |
| 77 } | |
| 78 for (int i = 0; i < features_.num_frames; ++i) { | |
| 79 chunkwise_rms_.push_back(features_.rms[i]); | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 } // namespace webrtc | |
| OLD | NEW |