Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(302)

Side by Side Diff: webrtc/tools/agc/activity_metric.cc

Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master
Patch Set: Resync Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « webrtc/test/fake_audio_device.cc ('k') | webrtc/tools/agc/agc_manager.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
54 54
55 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a 55 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a
56 // silence frame. Otherwise true VAD would drift with respect to the audio. 56 // silence frame. Otherwise true VAD would drift with respect to the audio.
57 // We only consider mono inputs. 57 // We only consider mono inputs.
58 static void DitherSilence(AudioFrame* frame) { 58 static void DitherSilence(AudioFrame* frame) {
59 ASSERT_EQ(1, frame->num_channels_); 59 ASSERT_EQ(1, frame->num_channels_);
60 const double kRmsSilence = 5; 60 const double kRmsSilence = 5;
61 const double sum_squared_silence = kRmsSilence * kRmsSilence * 61 const double sum_squared_silence = kRmsSilence * kRmsSilence *
62 frame->samples_per_channel_; 62 frame->samples_per_channel_;
63 double sum_squared = 0; 63 double sum_squared = 0;
64 for (int n = 0; n < frame->samples_per_channel_; n++) 64 for (size_t n = 0; n < frame->samples_per_channel_; n++)
65 sum_squared += frame->data_[n] * frame->data_[n]; 65 sum_squared += frame->data_[n] * frame->data_[n];
66 if (sum_squared <= sum_squared_silence) { 66 if (sum_squared <= sum_squared_silence) {
67 for (int n = 0; n < frame->samples_per_channel_; n++) 67 for (size_t n = 0; n < frame->samples_per_channel_; n++)
68 frame->data_[n] = (rand() & 0xF) - 8; 68 frame->data_[n] = (rand() & 0xF) - 8;
69 } 69 }
70 } 70 }
71 71
72 class AgcStat { 72 class AgcStat {
73 public: 73 public:
74 AgcStat() 74 AgcStat()
75 : video_index_(0), 75 : video_index_(0),
76 activity_threshold_(kDefaultActivityThreshold), 76 activity_threshold_(kDefaultActivityThreshold),
77 audio_content_(Histogram::Create(kAgcAnalWindowSamples)), 77 audio_content_(Histogram::Create(kAgcAnalWindowSamples)),
78 audio_processing_(new VadAudioProc()), 78 audio_processing_(new VadAudioProc()),
79 vad_(new PitchBasedVad()), 79 vad_(new PitchBasedVad()),
80 standalone_vad_(StandaloneVad::Create()), 80 standalone_vad_(StandaloneVad::Create()),
81 audio_content_fid_(NULL) { 81 audio_content_fid_(NULL) {
82 for (int n = 0; n < kMaxNumFrames; n++) 82 for (size_t n = 0; n < kMaxNumFrames; n++)
83 video_vad_[n] = 0.5; 83 video_vad_[n] = 0.5;
84 } 84 }
85 85
86 ~AgcStat() { 86 ~AgcStat() {
87 if (audio_content_fid_ != NULL) { 87 if (audio_content_fid_ != NULL) {
88 fclose(audio_content_fid_); 88 fclose(audio_content_fid_);
89 } 89 }
90 } 90 }
91 91
92 void set_audio_content_file(FILE* audio_content_fid) { 92 void set_audio_content_file(FILE* audio_content_fid) {
(...skipping 16 matching lines...) Expand all
109 frame.samples_per_channel_); 109 frame.samples_per_channel_);
110 } 110 }
111 if (features.num_frames > 0) { 111 if (features.num_frames > 0) {
112 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; 112 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};
113 if (FLAGS_standalone_vad) { 113 if (FLAGS_standalone_vad) {
114 standalone_vad_->GetActivity(p, kMaxNumFrames); 114 standalone_vad_->GetActivity(p, kMaxNumFrames);
115 } 115 }
116 // TODO(turajs) combining and limiting are used in the source files as 116 // TODO(turajs) combining and limiting are used in the source files as
117 // well they can be moved to utility. 117 // well they can be moved to utility.
118 // Combine Video and stand-alone VAD. 118 // Combine Video and stand-alone VAD.
119 for (int n = 0; n < features.num_frames; n++) { 119 for (size_t n = 0; n < features.num_frames; n++) {
120 double p_active = p[n] * video_vad_[n]; 120 double p_active = p[n] * video_vad_[n];
121 double p_passive = (1 - p[n]) * (1 - video_vad_[n]); 121 double p_passive = (1 - p[n]) * (1 - video_vad_[n]);
122 p[n] = p_active / (p_active + p_passive); 122 p[n] = p_active / (p_active + p_passive);
123 // Limit probabilities. 123 // Limit probabilities.
124 p[n] = std::min(std::max(p[n], 0.01), 0.99); 124 p[n] = std::min(std::max(p[n], 0.01), 0.99);
125 } 125 }
126 if (vad_->VoicingProbability(features, p) < 0) 126 if (vad_->VoicingProbability(features, p) < 0)
127 return -1; 127 return -1;
128 for (int n = 0; n < features.num_frames; n++) { 128 for (size_t n = 0; n < features.num_frames; n++) {
129 audio_content_->Update(features.rms[n], p[n]); 129 audio_content_->Update(features.rms[n], p[n]);
130 double ac = audio_content_->AudioContent(); 130 double ac = audio_content_->AudioContent();
131 if (audio_content_fid_ != NULL) { 131 if (audio_content_fid_ != NULL) {
132 fwrite(&ac, sizeof(ac), 1, audio_content_fid_); 132 fwrite(&ac, sizeof(ac), 1, audio_content_fid_);
133 } 133 }
134 if (ac > kAgcAnalWindowSamples * activity_threshold_) { 134 if (ac > kAgcAnalWindowSamples * activity_threshold_) {
135 combined_vad[n] = 1; 135 combined_vad[n] = 1;
136 } else { 136 } else {
137 combined_vad[n] = 0; 137 combined_vad[n] = 0;
138 } 138 }
139 } 139 }
140 video_index_ = 0; 140 video_index_ = 0;
141 } 141 }
142 return features.num_frames; 142 return static_cast<int>(features.num_frames);
143 } 143 }
144 144
145 void Reset() { 145 void Reset() {
146 audio_content_->Reset(); 146 audio_content_->Reset();
147 } 147 }
148 148
149 void SetActivityThreshold(double activity_threshold) { 149 void SetActivityThreshold(double activity_threshold) {
150 activity_threshold_ = activity_threshold; 150 activity_threshold_ = activity_threshold;
151 } 151 }
152 152
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
239 double p_video = 0.5; 239 double p_video = 0.5;
240 int total_active = 0; 240 int total_active = 0;
241 int total_passive = 0; 241 int total_passive = 0;
242 int total_false_positive = 0; 242 int total_false_positive = 0;
243 int total_missed_detection = 0; 243 int total_missed_detection = 0;
244 int onset_adaptation = 0; 244 int onset_adaptation = 0;
245 int num_onsets = 0; 245 int num_onsets = 0;
246 bool onset = false; 246 bool onset = false;
247 uint8_t previous_true_vad = 0; 247 uint8_t previous_true_vad = 0;
248 int num_not_adapted = 0; 248 int num_not_adapted = 0;
249 int true_vad_index = 0; 249 size_t true_vad_index = 0;
250 bool in_false_positive_region = false; 250 bool in_false_positive_region = false;
251 int total_false_positive_duration = 0; 251 int total_false_positive_duration = 0;
252 bool video_adapted = false; 252 bool video_adapted = false;
253 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), 253 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),
254 kSamplesToRead, pcm_fid)) { 254 kSamplesToRead, pcm_fid)) {
255 assert(true_vad_index < kMaxNumFrames); 255 assert(true_vad_index < kMaxNumFrames);
256 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, 256 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,
257 true_vad_fid)) 257 true_vad_fid))
258 << "Size mismatch between True-VAD and the PCM file.\n"; 258 << "Size mismatch between True-VAD and the PCM file.\n";
259 if (video_vad_fid != NULL) { 259 if (video_vad_fid != NULL) {
(...skipping 25 matching lines...) Expand all
285 agc_stat.Reset(); 285 agc_stat.Reset();
286 } 286 }
287 true_vad_index++; 287 true_vad_index++;
288 288
289 DitherSilence(&frame); 289 DitherSilence(&frame);
290 290
291 ret_val = agc_stat.AddAudio(frame, p_video, agc_vad); 291 ret_val = agc_stat.AddAudio(frame, p_video, agc_vad);
292 ASSERT_GE(ret_val, 0); 292 ASSERT_GE(ret_val, 0);
293 293
294 if (ret_val > 0) { 294 if (ret_val > 0) {
295 ASSERT_EQ(true_vad_index, ret_val); 295 ASSERT_EQ(true_vad_index, static_cast<size_t>(ret_val));
296 for (int n = 0; n < ret_val; n++) { 296 for (int n = 0; n < ret_val; n++) {
297 if (true_vad[n] == 1) { 297 if (true_vad[n] == 1) {
298 total_active++; 298 total_active++;
299 if (previous_true_vad == 0) { 299 if (previous_true_vad == 0) {
300 num_onsets++; 300 num_onsets++;
301 onset = true; 301 onset = true;
302 } 302 }
303 if (agc_vad[n] == 0) { 303 if (agc_vad[n] == 0) {
304 total_missed_detection++; 304 total_missed_detection++;
305 if (onset) 305 if (onset)
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
375 " one probability per frame.\n" 375 " one probability per frame.\n"
376 "\nUsage:\n\n" 376 "\nUsage:\n\n"
377 "activity_metric input_pcm [options]\n" 377 "activity_metric input_pcm [options]\n"
378 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " 378 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "
379 "format.\n\n"; 379 "format.\n\n";
380 google::SetUsageMessage(kUsage); 380 google::SetUsageMessage(kUsage);
381 google::ParseCommandLineFlags(&argc, &argv, true); 381 google::ParseCommandLineFlags(&argc, &argv, true);
382 webrtc::void_main(argc, argv); 382 webrtc::void_main(argc, argv);
383 return 0; 383 return 0;
384 } 384 }
OLDNEW
« no previous file with comments | « webrtc/test/fake_audio_device.cc ('k') | webrtc/tools/agc/agc_manager.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698