OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" | |
12 | |
13 #include <algorithm> | |
14 #include <vector> | |
15 | |
16 #include "testing/gtest/include/gtest/gtest.h" | |
17 #include "webrtc/test/testsupport/fileutils.h" | |
18 | |
19 namespace webrtc { | |
20 namespace { | |
21 | |
22 const int kStartTimeSec = 16; | |
23 const float kMeanSpeechProbability = 0.3f; | |
24 const float kMaxNoiseProbability = 0.1f; | |
25 const size_t kNumChunks = 300u; | |
26 const size_t kNumChunksPerIsacBlock = 3; | |
27 | |
28 void GenerateNoise(std::vector<int16_t>* data) { | |
29 for (size_t i = 0; i < data->size(); ++i) { | |
30 // std::rand returns between 0 and RAND_MAX, but this will work because it | |
31 // wraps into some random place. | |
32 (*data)[i] = std::rand(); | |
33 } | |
34 } | |
35 | |
36 } // namespace | |
37 | |
38 TEST(VoiceActivityDetectorTest, ConstructorSetsDefaultValues) { | |
39 const float kDefaultVoiceValue = 1.f; | |
40 | |
41 VoiceActivityDetector vad; | |
42 | |
43 std::vector<double> p = vad.chunkwise_voice_probabilities(); | |
44 std::vector<double> rms = vad.chunkwise_rms(); | |
45 | |
46 EXPECT_EQ(p.size(), 0u); | |
47 EXPECT_EQ(rms.size(), 0u); | |
48 | |
49 EXPECT_FLOAT_EQ(vad.last_voice_probability(), kDefaultVoiceValue); | |
50 } | |
51 | |
52 TEST(VoiceActivityDetectorTest, Speech16kHzHasHighVoiceProbabilities) { | |
53 const int kSampleRateHz = 16000; | |
54 const int kLength10Ms = kSampleRateHz / 100; | |
55 | |
56 VoiceActivityDetector vad; | |
57 | |
58 std::vector<int16_t> data(kLength10Ms); | |
59 float mean_probability = 0.f; | |
60 | |
61 FILE* pcm_file = | |
62 fopen(test::ResourcePath("audio_processing/transient/audio16kHz", "pcm") | |
63 .c_str(), | |
64 "rb"); | |
65 ASSERT_TRUE(pcm_file != nullptr); | |
66 // The silences in the file are skipped to get a more robust voice probability | |
67 // for speech. | |
68 ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), | |
69 SEEK_SET), | |
70 0); | |
71 | |
72 size_t num_chunks = 0; | |
73 while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == | |
74 data.size()) { | |
75 vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); | |
76 | |
77 mean_probability += vad.last_voice_probability(); | |
78 | |
79 ++num_chunks; | |
80 } | |
81 | |
82 mean_probability /= num_chunks; | |
83 | |
84 EXPECT_GT(mean_probability, kMeanSpeechProbability); | |
85 } | |
86 | |
87 TEST(VoiceActivityDetectorTest, Speech32kHzHasHighVoiceProbabilities) { | |
88 const int kSampleRateHz = 32000; | |
89 const int kLength10Ms = kSampleRateHz / 100; | |
90 | |
91 VoiceActivityDetector vad; | |
92 | |
93 std::vector<int16_t> data(kLength10Ms); | |
94 float mean_probability = 0.f; | |
95 | |
96 FILE* pcm_file = | |
97 fopen(test::ResourcePath("audio_processing/transient/audio32kHz", "pcm") | |
98 .c_str(), | |
99 "rb"); | |
100 ASSERT_TRUE(pcm_file != nullptr); | |
101 // The silences in the file are skipped to get a more robust voice probability | |
102 // for speech. | |
103 ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), | |
104 SEEK_SET), | |
105 0); | |
106 | |
107 size_t num_chunks = 0; | |
108 while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == | |
109 data.size()) { | |
110 vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); | |
111 | |
112 mean_probability += vad.last_voice_probability(); | |
113 | |
114 ++num_chunks; | |
115 } | |
116 | |
117 mean_probability /= num_chunks; | |
118 | |
119 EXPECT_GT(mean_probability, kMeanSpeechProbability); | |
120 } | |
121 | |
122 TEST(VoiceActivityDetectorTest, Noise16kHzHasLowVoiceProbabilities) { | |
123 VoiceActivityDetector vad; | |
124 | |
125 std::vector<int16_t> data(kLength10Ms); | |
126 float max_probability = 0.f; | |
127 | |
128 std::srand(42); | |
129 | |
130 for (size_t i = 0; i < kNumChunks; ++i) { | |
131 GenerateNoise(&data); | |
132 | |
133 vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); | |
134 | |
135 // Before the |vad has enough data to process an ISAC block it will return | |
136 // the default value, 1.f, which would ruin the |max_probability| value. | |
137 if (i > kNumChunksPerIsacBlock) { | |
138 max_probability = std::max(max_probability, vad.last_voice_probability()); | |
139 } | |
140 } | |
141 | |
142 EXPECT_LT(max_probability, kMaxNoiseProbability); | |
143 } | |
144 | |
145 TEST(VoiceActivityDetectorTest, Noise32kHzHasLowVoiceProbabilities) { | |
146 VoiceActivityDetector vad; | |
147 | |
148 std::vector<int16_t> data(2 * kLength10Ms); | |
149 float max_probability = 0.f; | |
150 | |
151 std::srand(42); | |
152 | |
153 for (size_t i = 0; i < kNumChunks; ++i) { | |
154 GenerateNoise(&data); | |
155 | |
156 vad.ProcessChunk(&data[0], data.size(), 2 * kSampleRateHz); | |
157 | |
158 // Before the |vad has enough data to process an ISAC block it will return | |
159 // the default value, 1.f, which would ruin the |max_probability| value. | |
160 if (i > kNumChunksPerIsacBlock) { | |
161 max_probability = std::max(max_probability, vad.last_voice_probability()); | |
162 } | |
163 } | |
164 | |
165 EXPECT_LT(max_probability, kMaxNoiseProbability); | |
166 } | |
167 | |
168 } // namespace webrtc | |
OLD | NEW |