OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" | |
12 | |
13 #include <algorithm> | |
14 #include <vector> | |
15 | |
16 #include "testing/gtest/include/gtest/gtest.h" | |
17 #include "webrtc/test/testsupport/fileutils.h" | |
18 | |
19 namespace webrtc { | |
20 namespace { | |
21 | |
22 const int kStartTimeSec = 16; | |
23 const float kMeanSpeechProbability = 0.3f; | |
24 const float kMaxNoiseProbability = 0.05f; | |
25 const size_t kNumChunks = 100u; | |
Andrew MacDonald
2015/06/18 02:02:51
Is it worthwhile making this a bit longer?
aluebs-webrtc
2015/06/18 16:32:39
Sure, why not. Now it has approximately the same a
| |
26 const size_t kNumChunksPerIsacBlock = 3; | |
27 | |
28 void GenerateNoise(std::vector<int16_t>* data) { | |
29 for (size_t i = 0; i < data->size(); ++i) { | |
30 // std::rand returns between 0 and RAND_MAX, but this will work because it | |
31 // wraps into some random place. | |
32 (*data)[i] = std::rand(); | |
33 } | |
34 } | |
35 | |
36 } // namespace | |
37 | |
38 TEST(VoiceActivityDetectorTest, ConstructorSetsDefaultValues) { | |
39 const float kDefaultVoiceValue = 1.f; | |
40 | |
41 VoiceActivityDetector vad; | |
42 | |
43 std::vector<double> p = vad.chunkwise_voice_probabilities(); | |
44 std::vector<double> rms = vad.chunkwise_rms(); | |
45 | |
46 EXPECT_EQ(p.size(), 0u); | |
47 EXPECT_EQ(rms.size(), 0u); | |
48 | |
49 EXPECT_FLOAT_EQ(vad.last_voice_probability(), kDefaultVoiceValue); | |
50 } | |
51 | |
52 TEST(VoiceActivityDetectorTest, Speech16kHzHasHighVoiceProbabilities) { | |
53 VoiceActivityDetector vad; | |
54 | |
55 std::vector<int16_t> data(kLength10Ms); | |
56 float mean_probability = 0.f; | |
57 | |
58 FILE* pcm_file = | |
59 fopen(test::ResourcePath("audio_processing/transient/audio16kHz", "pcm") | |
60 .c_str(), | |
61 "rb"); | |
62 ASSERT_TRUE(pcm_file != NULL); | |
63 ASSERT_EQ(fseek(pcm_file, | |
64 kStartTimeSec * kSampleRateHz * sizeof(data[0]), | |
65 SEEK_SET), 0); | |
66 | |
67 size_t num_chunks = 0; | |
68 while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == | |
69 data.size()) { | |
70 vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); | |
71 | |
72 mean_probability += vad.last_voice_probability(); | |
73 | |
74 ++num_chunks; | |
75 } | |
76 | |
77 mean_probability /= num_chunks; | |
78 | |
79 EXPECT_GT(mean_probability, kMeanSpeechProbability); | |
80 } | |
81 | |
82 TEST(VoiceActivityDetectorTest, Speech32kHzHasHighVoiceProbabilities) { | |
83 VoiceActivityDetector vad; | |
84 | |
85 std::vector<int16_t> data(2 * kLength10Ms); | |
Andrew MacDonald
2015/06/18 02:02:51
Same here, don't use 2 *.
aluebs-webrtc
2015/06/18 16:32:39
Done.
| |
86 float mean_probability = 0.f; | |
87 | |
88 FILE* pcm_file = | |
89 fopen(test::ResourcePath("audio_processing/transient/audio32kHz", "pcm") | |
90 .c_str(), | |
91 "rb"); | |
92 ASSERT_TRUE(pcm_file != NULL); | |
Andrew MacDonald
2015/06/18 02:02:51
nullptr
aluebs-webrtc
2015/06/18 16:32:39
Done.
| |
93 ASSERT_EQ(fseek(pcm_file, | |
Andrew MacDonald
2015/06/18 02:02:51
I think the fseek's OK, but add a comment explaini
aluebs-webrtc
2015/06/18 16:32:39
Of course. Done.
| |
94 kStartTimeSec * 2 * kSampleRateHz * sizeof(data[0]), | |
Andrew MacDonald
2015/06/18 02:02:51
Don't use 2 * kSampleRateHz. Add a new explicit sa
aluebs-webrtc
2015/06/18 16:32:39
They were coming from vad/common.h, but I agree th
| |
95 SEEK_SET), 0); | |
96 | |
97 size_t num_chunks = 0; | |
98 while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == | |
99 data.size()) { | |
100 vad.ProcessChunk(&data[0], data.size(), 2 * kSampleRateHz); | |
101 | |
102 mean_probability += vad.last_voice_probability(); | |
103 | |
104 ++num_chunks; | |
105 } | |
106 | |
107 mean_probability /= num_chunks; | |
108 | |
109 EXPECT_GT(mean_probability, kMeanSpeechProbability); | |
110 } | |
111 | |
112 TEST(VoiceActivityDetectorTest, Noise16kHzHasLowVoiceProbabilities) { | |
113 VoiceActivityDetector vad; | |
114 | |
115 std::vector<int16_t> data(kLength10Ms); | |
116 float max_probability = 0.f; | |
117 | |
118 std::srand(42); | |
119 | |
120 for (size_t i = 0; i < kNumChunks; ++i) { | |
121 GenerateNoise(&data); | |
122 | |
123 vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); | |
124 | |
125 // Before the |vad has enough data to process an ISAC block it will return | |
126 // the default value, 1.f, which would ruin the |max_probability| value. | |
127 if (i > kNumChunksPerIsacBlock) { | |
128 max_probability = std::max(max_probability, vad.last_voice_probability()); | |
129 } | |
130 } | |
131 | |
132 EXPECT_LT(max_probability, kMaxNoiseProbability); | |
133 } | |
134 | |
135 TEST(VoiceActivityDetectorTest, Noise32kHzHasLowVoiceProbabilities) { | |
136 VoiceActivityDetector vad; | |
137 | |
138 std::vector<int16_t> data(2 * kLength10Ms); | |
139 float max_probability = 0.f; | |
140 | |
141 std::srand(42); | |
142 | |
143 for (size_t i = 0; i < kNumChunks; ++i) { | |
144 GenerateNoise(&data); | |
145 | |
146 vad.ProcessChunk(&data[0], data.size(), 2 * kSampleRateHz); | |
147 | |
148 // Before the |vad has enough data to process an ISAC block it will return | |
149 // the default value, 1.f, which would ruin the |max_probability| value. | |
150 if (i > kNumChunksPerIsacBlock) { | |
151 max_probability = std::max(max_probability, vad.last_voice_probability()); | |
152 } | |
153 } | |
154 | |
155 EXPECT_LT(max_probability, kMaxNoiseProbability); | |
156 } | |
157 | |
158 } // namespace webrtc | |
OLD | NEW |