| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 72 | 72 |
| 73 class AgcStat { | 73 class AgcStat { |
| 74 public: | 74 public: |
| 75 AgcStat() | 75 AgcStat() |
| 76 : video_index_(0), | 76 : video_index_(0), |
| 77 activity_threshold_(kDefaultActivityThreshold), | 77 activity_threshold_(kDefaultActivityThreshold), |
| 78 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), | 78 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), |
| 79 audio_processing_(new VadAudioProc()), | 79 audio_processing_(new VadAudioProc()), |
| 80 vad_(new PitchBasedVad()), | 80 vad_(new PitchBasedVad()), |
| 81 standalone_vad_(StandaloneVad::Create()), | 81 standalone_vad_(StandaloneVad::Create()), |
| 82 audio_content_fid_(NULL) { | 82 audio_content_fid_(nullptr) { |
| 83 for (size_t n = 0; n < kMaxNumFrames; n++) | 83 for (size_t n = 0; n < kMaxNumFrames; n++) |
| 84 video_vad_[n] = 0.5; | 84 video_vad_[n] = 0.5; |
| 85 } | 85 } |
| 86 | 86 |
| 87 ~AgcStat() { | 87 ~AgcStat() { |
| 88 if (audio_content_fid_ != NULL) { | 88 if (audio_content_fid_ != nullptr) { |
| 89 fclose(audio_content_fid_); | 89 fclose(audio_content_fid_); |
| 90 } | 90 } |
| 91 } | 91 } |
| 92 | 92 |
| 93 void set_audio_content_file(FILE* audio_content_fid) { | 93 void set_audio_content_file(FILE* audio_content_fid) { |
| 94 audio_content_fid_ = audio_content_fid; | 94 audio_content_fid_ = audio_content_fid; |
| 95 } | 95 } |
| 96 | 96 |
| 97 int AddAudio(const AudioFrame& frame, double p_video, | 97 int AddAudio(const AudioFrame& frame, double p_video, |
| 98 int* combined_vad) { | 98 int* combined_vad) { |
| (...skipping 23 matching lines...) Expand all Loading... |
| 122 double p_passive = (1 - p[n]) * (1 - video_vad_[n]); | 122 double p_passive = (1 - p[n]) * (1 - video_vad_[n]); |
| 123 p[n] = p_active / (p_active + p_passive); | 123 p[n] = p_active / (p_active + p_passive); |
| 124 // Limit probabilities. | 124 // Limit probabilities. |
| 125 p[n] = std::min(std::max(p[n], 0.01), 0.99); | 125 p[n] = std::min(std::max(p[n], 0.01), 0.99); |
| 126 } | 126 } |
| 127 if (vad_->VoicingProbability(features, p) < 0) | 127 if (vad_->VoicingProbability(features, p) < 0) |
| 128 return -1; | 128 return -1; |
| 129 for (size_t n = 0; n < features.num_frames; n++) { | 129 for (size_t n = 0; n < features.num_frames; n++) { |
| 130 audio_content_->Update(features.rms[n], p[n]); | 130 audio_content_->Update(features.rms[n], p[n]); |
| 131 double ac = audio_content_->AudioContent(); | 131 double ac = audio_content_->AudioContent(); |
| 132 if (audio_content_fid_ != NULL) { | 132 if (audio_content_fid_ != nullptr) { |
| 133 fwrite(&ac, sizeof(ac), 1, audio_content_fid_); | 133 fwrite(&ac, sizeof(ac), 1, audio_content_fid_); |
| 134 } | 134 } |
| 135 if (ac > kAgcAnalWindowSamples * activity_threshold_) { | 135 if (ac > kAgcAnalWindowSamples * activity_threshold_) { |
| 136 combined_vad[n] = 1; | 136 combined_vad[n] = 1; |
| 137 } else { | 137 } else { |
| 138 combined_vad[n] = 0; | 138 combined_vad[n] = 0; |
| 139 } | 139 } |
| 140 } | 140 } |
| 141 video_index_ = 0; | 141 video_index_ = 0; |
| 142 } | 142 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 161 std::unique_ptr<StandaloneVad> standalone_vad_; | 161 std::unique_ptr<StandaloneVad> standalone_vad_; |
| 162 | 162 |
| 163 FILE* audio_content_fid_; | 163 FILE* audio_content_fid_; |
| 164 }; | 164 }; |
| 165 | 165 |
| 166 | 166 |
| 167 void void_main(int argc, char* argv[]) { | 167 void void_main(int argc, char* argv[]) { |
| 168 webrtc::AgcStat agc_stat; | 168 webrtc::AgcStat agc_stat; |
| 169 | 169 |
| 170 FILE* pcm_fid = fopen(argv[1], "rb"); | 170 FILE* pcm_fid = fopen(argv[1], "rb"); |
| 171 ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1]; | 171 ASSERT_TRUE(pcm_fid != nullptr) << "Cannot open PCM file " << argv[1]; |
| 172 | 172 |
| 173 if (argc < 2) { | 173 if (argc < 2) { |
| 174 fprintf(stderr, "\nNot Enough arguments\n"); | 174 fprintf(stderr, "\nNot Enough arguments\n"); |
| 175 } | 175 } |
| 176 | 176 |
| 177 FILE* true_vad_fid = NULL; | 177 FILE* true_vad_fid = nullptr; |
| 178 ASSERT_GT(FLAGS_true_vad.size(), 0u) << "Specify the file containing true " | 178 ASSERT_GT(FLAGS_true_vad.size(), 0u) << "Specify the file containing true " |
| 179 "VADs using --true_vad flag."; | 179 "VADs using --true_vad flag."; |
| 180 true_vad_fid = fopen(FLAGS_true_vad.c_str(), "rb"); | 180 true_vad_fid = fopen(FLAGS_true_vad.c_str(), "rb"); |
| 181 ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " << | 181 ASSERT_TRUE(true_vad_fid != nullptr) << "Cannot open the active list " |
| 182 FLAGS_true_vad; | 182 << FLAGS_true_vad; |
| 183 | 183 |
| 184 FILE* results_fid = NULL; | 184 FILE* results_fid = nullptr; |
| 185 if (FLAGS_result.size() > 0) { | 185 if (FLAGS_result.size() > 0) { |
| 186 // True if this is the first time writing to this function and we add a | 186 // True if this is the first time writing to this function and we add a |
| 187 // header to the beginning of the file. | 187 // header to the beginning of the file. |
| 188 bool write_header; | 188 bool write_header; |
| 189 // Open in the read mode. If it fails, the file doesn't exist and has to | 189 // Open in the read mode. If it fails, the file doesn't exist and has to |
| 190 // write a header for it. Otherwise no need to write a header. | 190 // write a header for it. Otherwise no need to write a header. |
| 191 results_fid = fopen(FLAGS_result.c_str(), "r"); | 191 results_fid = fopen(FLAGS_result.c_str(), "r"); |
| 192 if (results_fid == NULL) { | 192 if (results_fid == nullptr) { |
| 193 write_header = true; | 193 write_header = true; |
| 194 } else { | 194 } else { |
| 195 fclose(results_fid); | 195 fclose(results_fid); |
| 196 write_header = false; | 196 write_header = false; |
| 197 } | 197 } |
| 198 // Open in append mode. | 198 // Open in append mode. |
| 199 results_fid = fopen(FLAGS_result.c_str(), "a"); | 199 results_fid = fopen(FLAGS_result.c_str(), "a"); |
| 200 ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " << | 200 ASSERT_TRUE(results_fid != nullptr) << "Cannot open the file, " |
| 201 FLAGS_result << ", to write the results."; | 201 << FLAGS_result |
| 202 << ", to write the results."; |
| 202 // Write the header if required. | 203 // Write the header if required. |
| 203 if (write_header) { | 204 if (write_header) { |
| 204 fprintf(results_fid, "%% Total Active, Misdetection, " | 205 fprintf(results_fid, "%% Total Active, Misdetection, " |
| 205 "Total inactive, False Positive, On-sets, Missed segments, " | 206 "Total inactive, False Positive, On-sets, Missed segments, " |
| 206 "Average response\n"); | 207 "Average response\n"); |
| 207 } | 208 } |
| 208 } | 209 } |
| 209 | 210 |
| 210 FILE* video_vad_fid = NULL; | 211 FILE* video_vad_fid = nullptr; |
| 211 if (FLAGS_video_vad.size() > 0) { | 212 if (FLAGS_video_vad.size() > 0) { |
| 212 video_vad_fid = fopen(FLAGS_video_vad.c_str(), "rb"); | 213 video_vad_fid = fopen(FLAGS_video_vad.c_str(), "rb"); |
| 213 ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " << | 214 ASSERT_TRUE(video_vad_fid != nullptr) |
| 214 FLAGS_video_vad << " to read video-based VAD decisions.\n"; | 215 << "Cannot open the file, " << FLAGS_video_vad |
| 216 << " to read video-based VAD decisions.\n"; |
| 215 } | 217 } |
| 216 | 218 |
| 217 // AgsStat will be the owner of this file and will close it at its | 219 // AgsStat will be the owner of this file and will close it at its |
| 218 // destructor. | 220 // destructor. |
| 219 FILE* audio_content_fid = NULL; | 221 FILE* audio_content_fid = nullptr; |
| 220 if (FLAGS_audio_content.size() > 0) { | 222 if (FLAGS_audio_content.size() > 0) { |
| 221 audio_content_fid = fopen(FLAGS_audio_content.c_str(), "wb"); | 223 audio_content_fid = fopen(FLAGS_audio_content.c_str(), "wb"); |
| 222 ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " << | 224 ASSERT_TRUE(audio_content_fid != nullptr) << "Cannot open file, " |
| 223 FLAGS_audio_content << " to write audio-content.\n"; | 225 << FLAGS_audio_content |
| 226 << " to write audio-content.\n"; |
| 224 agc_stat.set_audio_content_file(audio_content_fid); | 227 agc_stat.set_audio_content_file(audio_content_fid); |
| 225 } | 228 } |
| 226 | 229 |
| 227 webrtc::AudioFrame frame; | 230 webrtc::AudioFrame frame; |
| 228 frame.num_channels_ = 1; | 231 frame.num_channels_ = 1; |
| 229 frame.sample_rate_hz_ = 16000; | 232 frame.sample_rate_hz_ = 16000; |
| 230 frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; | 233 frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; |
| 231 const size_t kSamplesToRead = frame.num_channels_ * | 234 const size_t kSamplesToRead = frame.num_channels_ * |
| 232 frame.samples_per_channel_; | 235 frame.samples_per_channel_; |
| 233 | 236 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 250 size_t true_vad_index = 0; | 253 size_t true_vad_index = 0; |
| 251 bool in_false_positive_region = false; | 254 bool in_false_positive_region = false; |
| 252 int total_false_positive_duration = 0; | 255 int total_false_positive_duration = 0; |
| 253 bool video_adapted = false; | 256 bool video_adapted = false; |
| 254 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), | 257 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), |
| 255 kSamplesToRead, pcm_fid)) { | 258 kSamplesToRead, pcm_fid)) { |
| 256 assert(true_vad_index < kMaxNumFrames); | 259 assert(true_vad_index < kMaxNumFrames); |
| 257 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, | 260 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, |
| 258 true_vad_fid)) | 261 true_vad_fid)) |
| 259 << "Size mismatch between True-VAD and the PCM file.\n"; | 262 << "Size mismatch between True-VAD and the PCM file.\n"; |
| 260 if (video_vad_fid != NULL) { | 263 if (video_vad_fid != nullptr) { |
| 261 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << | 264 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << |
| 262 "Not enough video-based VAD probabilities."; | 265 "Not enough video-based VAD probabilities."; |
| 263 } | 266 } |
| 264 | 267 |
| 265 // Negative video activity indicates that the video-based VAD is not yet | 268 // Negative video activity indicates that the video-based VAD is not yet |
| 266 // adapted. Disregards the learning phase in statistics. | 269 // adapted. Disregards the learning phase in statistics. |
| 267 if (p_video < 0) { | 270 if (p_video < 0) { |
| 268 if (video_adapted) { | 271 if (video_adapted) { |
| 269 fprintf(stderr, "Negative video probabilities ONLY allowed at the " | 272 fprintf(stderr, "Negative video probabilities ONLY allowed at the " |
| 270 "beginning of the sequence, not in the middle.\n"); | 273 "beginning of the sequence, not in the middle.\n"); |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 326 } | 329 } |
| 327 } else { | 330 } else { |
| 328 ASSERT_TRUE(false) << "Invalid value for true-VAD.\n"; | 331 ASSERT_TRUE(false) << "Invalid value for true-VAD.\n"; |
| 329 } | 332 } |
| 330 previous_true_vad = true_vad[n]; | 333 previous_true_vad = true_vad[n]; |
| 331 } | 334 } |
| 332 true_vad_index = 0; | 335 true_vad_index = 0; |
| 333 } | 336 } |
| 334 } | 337 } |
| 335 | 338 |
| 336 if (results_fid != NULL) { | 339 if (results_fid != nullptr) { |
| 337 fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", | 340 fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", |
| 338 total_active, | 341 total_active, |
| 339 total_missed_detection, | 342 total_missed_detection, |
| 340 total_passive, | 343 total_passive, |
| 341 total_false_positive, | 344 total_false_positive, |
| 342 num_onsets, | 345 num_onsets, |
| 343 num_not_adapted, | 346 num_not_adapted, |
| 344 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), | 347 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), |
| 345 static_cast<float>(total_false_positive_duration) / | 348 static_cast<float>(total_false_positive_duration) / |
| 346 (total_passive + 1e-12)); | 349 (total_passive + 1e-12)); |
| 347 } | 350 } |
| 348 fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", | 351 fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", |
| 349 total_active, | 352 total_active, |
| 350 total_missed_detection, | 353 total_missed_detection, |
| 351 total_passive, | 354 total_passive, |
| 352 total_false_positive, | 355 total_false_positive, |
| 353 num_onsets, | 356 num_onsets, |
| 354 num_not_adapted, | 357 num_not_adapted, |
| 355 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), | 358 static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), |
| 356 static_cast<float>(total_false_positive_duration) / | 359 static_cast<float>(total_false_positive_duration) / |
| 357 (total_passive + 1e-12)); | 360 (total_passive + 1e-12)); |
| 358 | 361 |
| 359 fclose(true_vad_fid); | 362 fclose(true_vad_fid); |
| 360 fclose(pcm_fid); | 363 fclose(pcm_fid); |
| 361 if (video_vad_fid != NULL) { | 364 if (video_vad_fid != nullptr) { |
| 362 fclose(video_vad_fid); | 365 fclose(video_vad_fid); |
| 363 } | 366 } |
| 364 if (results_fid != NULL) { | 367 if (results_fid != nullptr) { |
| 365 fclose(results_fid); | 368 fclose(results_fid); |
| 366 } | 369 } |
| 367 } | 370 } |
| 368 | 371 |
| 369 } // namespace webrtc | 372 } // namespace webrtc |
| 370 | 373 |
| 371 int main(int argc, char* argv[]) { | 374 int main(int argc, char* argv[]) { |
| 372 char kUsage[] = | 375 char kUsage[] = |
| 373 "\nCompute the number of misdetected and false-positive frames. Not\n" | 376 "\nCompute the number of misdetected and false-positive frames. Not\n" |
| 374 " that for each frame of audio (10 ms) there should be one true\n" | 377 " that for each frame of audio (10 ms) there should be one true\n" |
| 375 " activity. If any video-based activity is given, there should also be\n" | 378 " activity. If any video-based activity is given, there should also be\n" |
| 376 " one probability per frame.\n" | 379 " one probability per frame.\n" |
| 377 "\nUsage:\n\n" | 380 "\nUsage:\n\n" |
| 378 "activity_metric input_pcm [options]\n" | 381 "activity_metric input_pcm [options]\n" |
| 379 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " | 382 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " |
| 380 "format.\n\n"; | 383 "format.\n\n"; |
| 381 google::SetUsageMessage(kUsage); | 384 google::SetUsageMessage(kUsage); |
| 382 google::ParseCommandLineFlags(&argc, &argv, true); | 385 google::ParseCommandLineFlags(&argc, &argv, true); |
| 383 webrtc::void_main(argc, argv); | 386 webrtc::void_main(argc, argv); |
| 384 return 0; | 387 return 0; |
| 385 } | 388 } |
| OLD | NEW |