Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Side by Side Diff: webrtc/tools/agc/activity_metric.cc

Issue 2874403003: Remove gflags dependency for event_log_visualizer and activity_metric (Closed)
Patch Set: Fix float compile error on Win Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/tools/BUILD.gn ('k') | webrtc/tools/event_log_visualizer/main.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 11
12 #include <math.h> 12 #include <math.h>
13 #include <stdio.h> 13 #include <stdio.h>
14 #include <stdlib.h> 14 #include <stdlib.h>
15 15
16 #include <algorithm> 16 #include <algorithm>
17 #include <memory> 17 #include <memory>
18 18
19 #include "gflags/gflags.h" 19 #include "webrtc/base/flags.h"
20 #include "webrtc/modules/audio_processing/agc/agc.h" 20 #include "webrtc/modules/audio_processing/agc/agc.h"
21 #include "webrtc/modules/audio_processing/agc/loudness_histogram.h" 21 #include "webrtc/modules/audio_processing/agc/loudness_histogram.h"
22 #include "webrtc/modules/audio_processing/agc/utility.h" 22 #include "webrtc/modules/audio_processing/agc/utility.h"
23 #include "webrtc/modules/audio_processing/vad/common.h" 23 #include "webrtc/modules/audio_processing/vad/common.h"
24 #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" 24 #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
25 #include "webrtc/modules/audio_processing/vad/standalone_vad.h" 25 #include "webrtc/modules/audio_processing/vad/standalone_vad.h"
26 #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" 26 #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
27 #include "webrtc/modules/include/module_common_types.h" 27 #include "webrtc/modules/include/module_common_types.h"
28 #include "webrtc/test/gtest.h" 28 #include "webrtc/test/gtest.h"
29 29
30 static const int kAgcAnalWindowSamples = 100; 30 static const int kAgcAnalWindowSamples = 100;
31 static const double kDefaultActivityThreshold = 0.3; 31 static const float kDefaultActivityThreshold = 0.3f;
32 32
33 DEFINE_bool(standalone_vad, true, "enable stand-alone VAD"); 33 DEFINE_bool(standalone_vad, true, "enable stand-alone VAD");
34 DEFINE_string(true_vad, "", "name of a file containing true VAD in 'int'" 34 DEFINE_string(true_vad, "", "name of a file containing true VAD in 'int'"
35 " format"); 35 " format");
36 DEFINE_string(video_vad, "", "name of a file containing video VAD (activity" 36 DEFINE_string(video_vad, "", "name of a file containing video VAD (activity"
37 " probabilities) in double format. One activity per 10ms is" 37 " probabilities) in double format. One activity per 10ms is"
38 " required. If no file is given the video information is not" 38 " required. If no file is given the video information is not"
39 " incorporated. Negative activity is interpreted as video is" 39 " incorporated. Negative activity is interpreted as video is"
40 " not adapted and the statistics are not computed during" 40 " not adapted and the statistics are not computed during"
41 " the learning phase. Note that the negative video activities" 41 " the learning phase. Note that the negative video activities"
42 " are ONLY allowed at the beginning."); 42 " are ONLY allowed at the beginning.");
43 DEFINE_string(result, "", "name of a file to write the results. The results" 43 DEFINE_string(result, "", "name of a file to write the results. The results"
44 " will be appended to the end of the file. This is optional."); 44 " will be appended to the end of the file. This is optional.");
45 DEFINE_string(audio_content, "", "name of a file where audio content is written" 45 DEFINE_string(audio_content, "", "name of a file where audio content is written"
46 " to, in double format."); 46 " to, in double format.");
47 DEFINE_double(activity_threshold, kDefaultActivityThreshold, 47 DEFINE_float(activity_threshold, kDefaultActivityThreshold,
48 "Activity threshold"); 48 "Activity threshold");
49 49
50 namespace webrtc { 50 namespace webrtc {
51 51
52 // TODO(turajs) A new CL will be committed soon where ExtractFeatures will 52 // TODO(turajs) A new CL will be committed soon where ExtractFeatures will
53 // notify the caller of "silence" input, instead of bailing out. We would not 53 // notify the caller of "silence" input, instead of bailing out. We would not
54 // need the following function when such a change is made. 54 // need the following function when such a change is made.
55 55
56 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a 56 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a
57 // silence frame. Otherwise true VAD would drift with respect to the audio. 57 // silence frame. Otherwise true VAD would drift with respect to the audio.
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
98 int* combined_vad) { 98 int* combined_vad) {
99 if (frame.num_channels_ != 1 || 99 if (frame.num_channels_ != 1 ||
100 frame.samples_per_channel_ != 100 frame.samples_per_channel_ !=
101 kSampleRateHz / 100 || 101 kSampleRateHz / 100 ||
102 frame.sample_rate_hz_ != kSampleRateHz) 102 frame.sample_rate_hz_ != kSampleRateHz)
103 return -1; 103 return -1;
104 video_vad_[video_index_++] = p_video; 104 video_vad_[video_index_++] = p_video;
105 AudioFeatures features; 105 AudioFeatures features;
106 audio_processing_->ExtractFeatures( 106 audio_processing_->ExtractFeatures(
107 frame.data_, frame.samples_per_channel_, &features); 107 frame.data_, frame.samples_per_channel_, &features);
108 if (FLAGS_standalone_vad) { 108 if (FLAG_standalone_vad) {
109 standalone_vad_->AddAudio(frame.data_, 109 standalone_vad_->AddAudio(frame.data_,
110 frame.samples_per_channel_); 110 frame.samples_per_channel_);
111 } 111 }
112 if (features.num_frames > 0) { 112 if (features.num_frames > 0) {
113 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; 113 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};
114 if (FLAGS_standalone_vad) { 114 if (FLAG_standalone_vad) {
115 standalone_vad_->GetActivity(p, kMaxNumFrames); 115 standalone_vad_->GetActivity(p, kMaxNumFrames);
116 } 116 }
117 // TODO(turajs) combining and limiting are used in the source files as 117 // TODO(turajs) combining and limiting are used in the source files as
118 // well they can be moved to utility. 118 // well they can be moved to utility.
119 // Combine Video and stand-alone VAD. 119 // Combine Video and stand-alone VAD.
120 for (size_t n = 0; n < features.num_frames; n++) { 120 for (size_t n = 0; n < features.num_frames; n++) {
121 double p_active = p[n] * video_vad_[n]; 121 double p_active = p[n] * video_vad_[n];
122 double p_passive = (1 - p[n]) * (1 - video_vad_[n]); 122 double p_passive = (1 - p[n]) * (1 - video_vad_[n]);
123 p[n] = p_active / (p_active + p_passive); 123 p[n] = p_active / (p_active + p_passive);
124 // Limit probabilities. 124 // Limit probabilities.
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 webrtc::AgcStat agc_stat; 168 webrtc::AgcStat agc_stat;
169 169
170 FILE* pcm_fid = fopen(argv[1], "rb"); 170 FILE* pcm_fid = fopen(argv[1], "rb");
171 ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1]; 171 ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1];
172 172
173 if (argc < 2) { 173 if (argc < 2) {
174 fprintf(stderr, "\nNot Enough arguments\n"); 174 fprintf(stderr, "\nNot Enough arguments\n");
175 } 175 }
176 176
177 FILE* true_vad_fid = NULL; 177 FILE* true_vad_fid = NULL;
178 ASSERT_GT(FLAGS_true_vad.size(), 0u) << "Specify the file containing true " 178 ASSERT_GT(strlen(FLAG_true_vad), 0u) << "Specify the file containing true "
179 "VADs using --true_vad flag."; 179 "VADs using --true_vad flag.";
180 true_vad_fid = fopen(FLAGS_true_vad.c_str(), "rb"); 180 true_vad_fid = fopen(FLAG_true_vad, "rb");
181 ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " << 181 ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " <<
182 FLAGS_true_vad; 182 FLAG_true_vad;
183 183
184 FILE* results_fid = NULL; 184 FILE* results_fid = NULL;
185 if (FLAGS_result.size() > 0) { 185 if (strlen(FLAG_result) > 0) {
186 // True if this is the first time writing to this function and we add a 186 // True if this is the first time writing to this function and we add a
187 // header to the beginning of the file. 187 // header to the beginning of the file.
188 bool write_header; 188 bool write_header;
189 // Open in the read mode. If it fails, the file doesn't exist and has to 189 // Open in the read mode. If it fails, the file doesn't exist and has to
190 // write a header for it. Otherwise no need to write a header. 190 // write a header for it. Otherwise no need to write a header.
191 results_fid = fopen(FLAGS_result.c_str(), "r"); 191 results_fid = fopen(FLAG_result, "r");
192 if (results_fid == NULL) { 192 if (results_fid == NULL) {
193 write_header = true; 193 write_header = true;
194 } else { 194 } else {
195 fclose(results_fid); 195 fclose(results_fid);
196 write_header = false; 196 write_header = false;
197 } 197 }
198 // Open in append mode. 198 // Open in append mode.
199 results_fid = fopen(FLAGS_result.c_str(), "a"); 199 results_fid = fopen(FLAG_result, "a");
200 ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " << 200 ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " <<
201 FLAGS_result << ", to write the results."; 201 FLAG_result << ", to write the results.";
202 // Write the header if required. 202 // Write the header if required.
203 if (write_header) { 203 if (write_header) {
204 fprintf(results_fid, "%% Total Active, Misdetection, " 204 fprintf(results_fid, "%% Total Active, Misdetection, "
205 "Total inactive, False Positive, On-sets, Missed segments, " 205 "Total inactive, False Positive, On-sets, Missed segments, "
206 "Average response\n"); 206 "Average response\n");
207 } 207 }
208 } 208 }
209 209
210 FILE* video_vad_fid = NULL; 210 FILE* video_vad_fid = NULL;
211 if (FLAGS_video_vad.size() > 0) { 211 if (strlen(FLAG_video_vad) > 0) {
212 video_vad_fid = fopen(FLAGS_video_vad.c_str(), "rb"); 212 video_vad_fid = fopen(FLAG_video_vad, "rb");
213 ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " << 213 ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " <<
214 FLAGS_video_vad << " to read video-based VAD decisions.\n"; 214 FLAG_video_vad << " to read video-based VAD decisions.\n";
215 } 215 }
216 216
217 // AgsStat will be the owner of this file and will close it at its 217 // AgsStat will be the owner of this file and will close it at its
218 // destructor. 218 // destructor.
219 FILE* audio_content_fid = NULL; 219 FILE* audio_content_fid = NULL;
220 if (FLAGS_audio_content.size() > 0) { 220 if (strlen(FLAG_audio_content) > 0) {
221 audio_content_fid = fopen(FLAGS_audio_content.c_str(), "wb"); 221 audio_content_fid = fopen(FLAG_audio_content, "wb");
222 ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " << 222 ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " <<
223 FLAGS_audio_content << " to write audio-content.\n"; 223 FLAG_audio_content << " to write audio-content.\n";
224 agc_stat.set_audio_content_file(audio_content_fid); 224 agc_stat.set_audio_content_file(audio_content_fid);
225 } 225 }
226 226
227 webrtc::AudioFrame frame; 227 webrtc::AudioFrame frame;
228 frame.num_channels_ = 1; 228 frame.num_channels_ = 1;
229 frame.sample_rate_hz_ = 16000; 229 frame.sample_rate_hz_ = 16000;
230 frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; 230 frame.samples_per_channel_ = frame.sample_rate_hz_ / 100;
231 const size_t kSamplesToRead = frame.num_channels_ * 231 const size_t kSamplesToRead = frame.num_channels_ *
232 frame.samples_per_channel_; 232 frame.samples_per_channel_;
233 233
234 agc_stat.SetActivityThreshold(FLAGS_activity_threshold); 234 agc_stat.SetActivityThreshold(FLAG_activity_threshold);
235 235
236 int ret_val = 0; 236 int ret_val = 0;
237 int num_frames = 0; 237 int num_frames = 0;
238 int agc_vad[kMaxNumFrames]; 238 int agc_vad[kMaxNumFrames];
239 uint8_t true_vad[kMaxNumFrames]; 239 uint8_t true_vad[kMaxNumFrames];
240 double p_video = 0.5; 240 double p_video = 0.5;
241 int total_active = 0; 241 int total_active = 0;
242 int total_passive = 0; 242 int total_passive = 0;
243 int total_false_positive = 0; 243 int total_false_positive = 0;
244 int total_missed_detection = 0; 244 int total_missed_detection = 0;
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 fclose(video_vad_fid); 362 fclose(video_vad_fid);
363 } 363 }
364 if (results_fid != NULL) { 364 if (results_fid != NULL) {
365 fclose(results_fid); 365 fclose(results_fid);
366 } 366 }
367 } 367 }
368 368
369 } // namespace webrtc 369 } // namespace webrtc
370 370
371 int main(int argc, char* argv[]) { 371 int main(int argc, char* argv[]) {
372 char kUsage[] = 372 std::string usage =
kwiberg-webrtc 2017/05/16 04:10:09 Wouldn't it make more sense to change this to a co
kjellander_webrtc 2017/05/16 05:44:17 Right, I changed that now.
373 "\nCompute the number of misdetected and false-positive frames. Not\n" 373 "\nCompute the number of misdetected and false-positive frames. Not\n"
374 " that for each frame of audio (10 ms) there should be one true\n" 374 " that for each frame of audio (10 ms) there should be one true\n"
375 " activity. If any video-based activity is given, there should also be\n" 375 " activity. If any video-based activity is given, there should also be\n"
376 " one probability per frame.\n" 376 " one probability per frame.\n"
377 "\nUsage:\n\n" 377 "\nUsage:\n\n"
378 "activity_metric input_pcm [options]\n" 378 "activity_metric input_pcm [options]\n"
379 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " 379 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "
380 "format.\n\n"; 380 "format.\n\n";
381 google::SetUsageMessage(kUsage); 381 if (argc == 1) {
382 google::ParseCommandLineFlags(&argc, &argv, true); 382 // Print usage information.
383 std::cout << usage;
384 return 0;
385 }
386 rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true);
383 webrtc::void_main(argc, argv); 387 webrtc::void_main(argc, argv);
384 return 0; 388 return 0;
385 } 389 }
OLDNEW
« no previous file with comments | « webrtc/tools/BUILD.gn ('k') | webrtc/tools/event_log_visualizer/main.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698