OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
72 | 72 |
73 class AgcStat { | 73 class AgcStat { |
74 public: | 74 public: |
75 AgcStat() | 75 AgcStat() |
76 : video_index_(0), | 76 : video_index_(0), |
77 activity_threshold_(kDefaultActivityThreshold), | 77 activity_threshold_(kDefaultActivityThreshold), |
78 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), | 78 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), |
79 audio_processing_(new VadAudioProc()), | 79 audio_processing_(new VadAudioProc()), |
80 vad_(new PitchBasedVad()), | 80 vad_(new PitchBasedVad()), |
81 standalone_vad_(StandaloneVad::Create()), | 81 standalone_vad_(StandaloneVad::Create()), |
82 audio_content_fid_(NULL) { | 82 audio_content_fid_(nullptr) { |
83 for (size_t n = 0; n < kMaxNumFrames; n++) | 83 for (size_t n = 0; n < kMaxNumFrames; n++) |
84 video_vad_[n] = 0.5; | 84 video_vad_[n] = 0.5; |
85 } | 85 } |
86 | 86 |
87 ~AgcStat() { | 87 ~AgcStat() { |
88 if (audio_content_fid_ != NULL) { | 88 if (audio_content_fid_ != nullptr) { |
89 fclose(audio_content_fid_); | 89 fclose(audio_content_fid_); |
90 } | 90 } |
91 } | 91 } |
92 | 92 |
93 void set_audio_content_file(FILE* audio_content_fid) { | 93 void set_audio_content_file(FILE* audio_content_fid) { |
94 audio_content_fid_ = audio_content_fid; | 94 audio_content_fid_ = audio_content_fid; |
95 } | 95 } |
96 | 96 |
97 int AddAudio(const AudioFrame& frame, double p_video, | 97 int AddAudio(const AudioFrame& frame, double p_video, |
98 int* combined_vad) { | 98 int* combined_vad) { |
(...skipping 23 matching lines...) Expand all Loading... |
122 double p_passive = (1 - p[n]) * (1 - video_vad_[n]); | 122 double p_passive = (1 - p[n]) * (1 - video_vad_[n]); |
123 p[n] = p_active / (p_active + p_passive); | 123 p[n] = p_active / (p_active + p_passive); |
124 // Limit probabilities. | 124 // Limit probabilities. |
125 p[n] = std::min(std::max(p[n], 0.01), 0.99); | 125 p[n] = std::min(std::max(p[n], 0.01), 0.99); |
126 } | 126 } |
127 if (vad_->VoicingProbability(features, p) < 0) | 127 if (vad_->VoicingProbability(features, p) < 0) |
128 return -1; | 128 return -1; |
129 for (size_t n = 0; n < features.num_frames; n++) { | 129 for (size_t n = 0; n < features.num_frames; n++) { |
130 audio_content_->Update(features.rms[n], p[n]); | 130 audio_content_->Update(features.rms[n], p[n]); |
131 double ac = audio_content_->AudioContent(); | 131 double ac = audio_content_->AudioContent(); |
132 if (audio_content_fid_ != NULL) { | 132 if (audio_content_fid_ != nullptr) { |
133 fwrite(&ac, sizeof(ac), 1, audio_content_fid_); | 133 fwrite(&ac, sizeof(ac), 1, audio_content_fid_); |
134 } | 134 } |
135 if (ac > kAgcAnalWindowSamples * activity_threshold_) { | 135 if (ac > kAgcAnalWindowSamples * activity_threshold_) { |
136 combined_vad[n] = 1; | 136 combined_vad[n] = 1; |
137 } else { | 137 } else { |
138 combined_vad[n] = 0; | 138 combined_vad[n] = 0; |
139 } | 139 } |
140 } | 140 } |
141 video_index_ = 0; | 141 video_index_ = 0; |
142 } | 142 } |
(...skipping 18 matching lines...) Expand all Loading... |
161 std::unique_ptr<StandaloneVad> standalone_vad_; | 161 std::unique_ptr<StandaloneVad> standalone_vad_; |
162 | 162 |
163 FILE* audio_content_fid_; | 163 FILE* audio_content_fid_; |
164 }; | 164 }; |
165 | 165 |
166 | 166 |
167 void void_main(int argc, char* argv[]) { | 167 void void_main(int argc, char* argv[]) { |
168 webrtc::AgcStat agc_stat; | 168 webrtc::AgcStat agc_stat; |
169 | 169 |
170 FILE* pcm_fid = fopen(argv[1], "rb"); | 170 FILE* pcm_fid = fopen(argv[1], "rb"); |
171 ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1]; | 171 ASSERT_TRUE(pcm_fid != nullptr) << "Cannot open PCM file " << argv[1]; |
172 | 172 |
173 if (argc < 2) { | 173 if (argc < 2) { |
174 fprintf(stderr, "\nNot Enough arguments\n"); | 174 fprintf(stderr, "\nNot Enough arguments\n"); |
175 } | 175 } |
176 | 176 |
177 FILE* true_vad_fid = NULL; | 177 FILE* true_vad_fid = nullptr; |
178 ASSERT_GT(FLAGS_true_vad.size(), 0u) << "Specify the file containing true " | 178 ASSERT_GT(FLAGS_true_vad.size(), 0u) << "Specify the file containing true " |
179 "VADs using --true_vad flag."; | 179 "VADs using --true_vad flag."; |
180 true_vad_fid = fopen(FLAGS_true_vad.c_str(), "rb"); | 180 true_vad_fid = fopen(FLAGS_true_vad.c_str(), "rb"); |
181 ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " << | 181 ASSERT_TRUE(true_vad_fid != nullptr) << "Cannot open the active list " |
182 FLAGS_true_vad; | 182 << FLAGS_true_vad; |
183 | 183 |
184 FILE* results_fid = NULL; | 184 FILE* results_fid = nullptr; |
185 if (FLAGS_result.size() > 0) { | 185 if (FLAGS_result.size() > 0) { |
186 // True if this is the first time writing to this function and we add a | 186 // True if this is the first time writing to this function and we add a |
187 // header to the beginning of the file. | 187 // header to the beginning of the file. |
188 bool write_header; | 188 bool write_header; |
189 // Open in the read mode. If it fails, the file doesn't exist and has to | 189 // Open in the read mode. If it fails, the file doesn't exist and has to |
190 // write a header for it. Otherwise no need to write a header. | 190 // write a header for it. Otherwise no need to write a header. |
191 results_fid = fopen(FLAGS_result.c_str(), "r"); | 191 results_fid = fopen(FLAGS_result.c_str(), "r"); |
192 if (results_fid == NULL) { | 192 if (results_fid == nullptr) { |
193 write_header = true; | 193 write_header = true; |
194 } else { | 194 } else { |
195 fclose(results_fid); | 195 fclose(results_fid); |
196 write_header = false; | 196 write_header = false; |
197 } | 197 } |
198 // Open in append mode. | 198 // Open in append mode. |
199 results_fid = fopen(FLAGS_result.c_str(), "a"); | 199 results_fid = fopen(FLAGS_result.c_str(), "a"); |
200 ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " << | 200 ASSERT_TRUE(results_fid != nullptr) << "Cannot open the file, " |
201 FLAGS_result << ", to write the results."; | 201 << FLAGS_result |
| 202 << ", to write the results."; |
202 // Write the header if required. | 203 // Write the header if required. |
203 if (write_header) { | 204 if (write_header) { |
204 fprintf(results_fid, "%% Total Active, Misdetection, " | 205 fprintf(results_fid, "%% Total Active, Misdetection, " |
205 "Total inactive, False Positive, On-sets, Missed segments, " | 206 "Total inactive, False Positive, On-sets, Missed segments, " |
206 "Average response\n"); | 207 "Average response\n"); |
207 } | 208 } |
208 } | 209 } |
209 | 210 |
210 FILE* video_vad_fid = NULL; | 211 FILE* video_vad_fid = nullptr; |
211 if (FLAGS_video_vad.size() > 0) { | 212 if (FLAGS_video_vad.size() > 0) { |
212 video_vad_fid = fopen(FLAGS_video_vad.c_str(), "rb"); | 213 video_vad_fid = fopen(FLAGS_video_vad.c_str(), "rb"); |
213 ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " << | 214 ASSERT_TRUE(video_vad_fid != nullptr) |
214 FLAGS_video_vad << " to read video-based VAD decisions.\n"; | 215 << "Cannot open the file, " << FLAGS_video_vad |
| 216 << " to read video-based VAD decisions.\n"; |
215 } | 217 } |
216 | 218 |
217 // AgsStat will be the owner of this file and will close it at its | 219 // AgsStat will be the owner of this file and will close it at its |
218 // destructor. | 220 // destructor. |
219 FILE* audio_content_fid = NULL; | 221 FILE* audio_content_fid = nullptr; |
220 if (FLAGS_audio_content.size() > 0) { | 222 if (FLAGS_audio_content.size() > 0) { |
221 audio_content_fid = fopen(FLAGS_audio_content.c_str(), "wb"); | 223 audio_content_fid = fopen(FLAGS_audio_content.c_str(), "wb"); |
222 ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " << | 224 ASSERT_TRUE(audio_content_fid != nullptr) << "Cannot open file, " |
223 FLAGS_audio_content << " to write audio-content.\n"; | 225 << FLAGS_audio_content |
| 226 << " to write audio-content.\n"; |
224 agc_stat.set_audio_content_file(audio_content_fid); | 227 agc_stat.set_audio_content_file(audio_content_fid); |
225 } | 228 } |
226 | 229 |
227 webrtc::AudioFrame frame; | 230 webrtc::AudioFrame frame; |
228 frame.num_channels_ = 1; | 231 frame.num_channels_ = 1; |
229 frame.sample_rate_hz_ = 16000; | 232 frame.sample_rate_hz_ = 16000; |
230 frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; | 233 frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; |
231 const size_t kSamplesToRead = frame.num_channels_ * | 234 const size_t kSamplesToRead = frame.num_channels_ * |
232 frame.samples_per_channel_; | 235 frame.samples_per_channel_; |
233 | 236 |
(...skipping 16 matching lines...) Expand all Loading... |
250 size_t true_vad_index = 0; | 253 size_t true_vad_index = 0; |
251 bool in_false_positive_region = false; | 254 bool in_false_positive_region = false; |
252 int total_false_positive_duration = 0; | 255 int total_false_positive_duration = 0; |
253 bool video_adapted = false; | 256 bool video_adapted = false; |
254 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), | 257 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), |
255 kSamplesToRead, pcm_fid)) { | 258 kSamplesToRead, pcm_fid)) { |
256 assert(true_vad_index < kMaxNumFrames); | 259 assert(true_vad_index < kMaxNumFrames); |
257 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, | 260 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, |
258 true_vad_fid)) | 261 true_vad_fid)) |
259 << "Size mismatch between True-VAD and the PCM file.\n"; | 262 << "Size mismatch between True-VAD and the PCM file.\n"; |
260 if (video_vad_fid != NULL) { | 263 if (video_vad_fid != nullptr) { |
261 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << | 264 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << |
262 "Not enough video-based VAD probabilities."; | 265 "Not enough video-based VAD probabilities."; |
263 } | 266 } |
264 | 267 |
265 // Negative video activity indicates that the video-based VAD is not yet | 268 // Negative video activity indicates that the video-based VAD is not yet |
266 // adapted. Disregards the learning phase in statistics. | 269 // adapted. Disregards the learning phase in statistics. |
267 if (p_video < 0) { | 270 if (p_video < 0) { |
268 if (video_adapted) { | 271 if (video_adapted) { |
269 fprintf(stderr, "Negative video probabilities ONLY allowed at the " | 272 fprintf(stderr, "Negative video probabilities ONLY allowed at the " |
270 "beginning of the sequence, not in the middle.\n"); | 273 "beginning of the sequence, not in the middle.\n"); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
326 } | 329 } |
327 } else { | 330 } else { |
328 ASSERT_TRUE(false) << "Invalid value for true-VAD.\n"; | 331 ASSERT_TRUE(false) << "Invalid value for true-VAD.\n"; |
329 } | 332 } |
330 previous_true_vad = true_vad[n]; | 333 previous_true_vad = true_vad[n]; |
331 } | 334 } |
332 true_vad_index = 0; | 335 true_vad_index = 0; |
333 } | 336 } |
334 } | 337 } |
335 | 338 |
336 if (results_fid != NULL) { | 339 if (results_fid != nullptr) { |
337 fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", | 340 fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", |
338 total_active, | 341 total_active, |
339 total_missed_detection, | 342 total_missed_detection, |
340 total_passive, | 343 total_passive, |
341 total_false_positive, | 344 total_false_positive, |
342 num_onsets, | 345 num_onsets, |
343 num_not_adapted, | 346 num_not_adapted, |
344 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), | 347 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), |
345 static_cast<float>(total_false_positive_duration) / | 348 static_cast<float>(total_false_positive_duration) / |
346 (total_passive + 1e-12)); | 349 (total_passive + 1e-12)); |
347 } | 350 } |
348 fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", | 351 fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", |
349 total_active, | 352 total_active, |
350 total_missed_detection, | 353 total_missed_detection, |
351 total_passive, | 354 total_passive, |
352 total_false_positive, | 355 total_false_positive, |
353 num_onsets, | 356 num_onsets, |
354 num_not_adapted, | 357 num_not_adapted, |
355 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), | 358 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), |
356 static_cast<float>(total_false_positive_duration) / | 359 static_cast<float>(total_false_positive_duration) / |
357 (total_passive + 1e-12)); | 360 (total_passive + 1e-12)); |
358 | 361 |
359 fclose(true_vad_fid); | 362 fclose(true_vad_fid); |
360 fclose(pcm_fid); | 363 fclose(pcm_fid); |
361 if (video_vad_fid != NULL) { | 364 if (video_vad_fid != nullptr) { |
362 fclose(video_vad_fid); | 365 fclose(video_vad_fid); |
363 } | 366 } |
364 if (results_fid != NULL) { | 367 if (results_fid != nullptr) { |
365 fclose(results_fid); | 368 fclose(results_fid); |
366 } | 369 } |
367 } | 370 } |
368 | 371 |
369 } // namespace webrtc | 372 } // namespace webrtc |
370 | 373 |
371 int main(int argc, char* argv[]) { | 374 int main(int argc, char* argv[]) { |
372 char kUsage[] = | 375 char kUsage[] = |
373 "\nCompute the number of misdetected and false-positive frames. Not\n" | 376 "\nCompute the number of misdetected and false-positive frames. Not\n" |
374 " that for each frame of audio (10 ms) there should be one true\n" | 377 " that for each frame of audio (10 ms) there should be one true\n" |
375 " activity. If any video-based activity is given, there should also be\n" | 378 " activity. If any video-based activity is given, there should also be\n" |
376 " one probability per frame.\n" | 379 " one probability per frame.\n" |
377 "\nUsage:\n\n" | 380 "\nUsage:\n\n" |
378 "activity_metric input_pcm [options]\n" | 381 "activity_metric input_pcm [options]\n" |
379 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " | 382 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " |
380 "format.\n\n"; | 383 "format.\n\n"; |
381 google::SetUsageMessage(kUsage); | 384 google::SetUsageMessage(kUsage); |
382 google::ParseCommandLineFlags(&argc, &argv, true); | 385 google::ParseCommandLineFlags(&argc, &argv, true); |
383 webrtc::void_main(argc, argv); | 386 webrtc::void_main(argc, argv); |
384 return 0; | 387 return 0; |
385 } | 388 } |
OLD | NEW |