webrtc/tools/agc/activity_metric.cc - Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce

Side by Side Diff: webrtc/tools/agc/activity_metric.cc

Issue 1230503003: Update a ton of audio code to use size_t more correctly and in general reduce (Closed) Base URL: https://chromium.googlesource.com/external/webrtc@master

Patch Set: Resync Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
54	54

55 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a	55 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a

56 // silence frame. Otherwise true VAD would drift with respect to the audio.	56 // silence frame. Otherwise true VAD would drift with respect to the audio.

57 // We only consider mono inputs.	57 // We only consider mono inputs.

58 static void DitherSilence(AudioFrame* frame) {	58 static void DitherSilence(AudioFrame* frame) {

59 ASSERT_EQ(1, frame->num_channels_);	59 ASSERT_EQ(1, frame->num_channels_);

60 const double kRmsSilence = 5;	60 const double kRmsSilence = 5;

61 const double sum_squared_silence = kRmsSilence * kRmsSilence *	61 const double sum_squared_silence = kRmsSilence * kRmsSilence *

62 frame->samples_per_channel_;	62 frame->samples_per_channel_;

63 double sum_squared = 0;	63 double sum_squared = 0;

64 for (int n = 0; n < frame->samples_per_channel_; n++)	64 for (size_t n = 0; n < frame->samples_per_channel_; n++)

65 sum_squared += frame->data_[n] * frame->data_[n];	65 sum_squared += frame->data_[n] * frame->data_[n];

66 if (sum_squared <= sum_squared_silence) {	66 if (sum_squared <= sum_squared_silence) {

67 for (int n = 0; n < frame->samples_per_channel_; n++)	67 for (size_t n = 0; n < frame->samples_per_channel_; n++)

68 frame->data_[n] = (rand() & 0xF) - 8;	68 frame->data_[n] = (rand() & 0xF) - 8;

69 }	69 }

70 }	70 }

71	71

72 class AgcStat {	72 class AgcStat {

73 public:	73 public:

74 AgcStat()	74 AgcStat()

75 : video_index_(0),	75 : video_index_(0),

76 activity_threshold_(kDefaultActivityThreshold),	76 activity_threshold_(kDefaultActivityThreshold),

77 audio_content_(Histogram::Create(kAgcAnalWindowSamples)),	77 audio_content_(Histogram::Create(kAgcAnalWindowSamples)),

78 audio_processing_(new VadAudioProc()),	78 audio_processing_(new VadAudioProc()),

79 vad_(new PitchBasedVad()),	79 vad_(new PitchBasedVad()),

80 standalone_vad_(StandaloneVad::Create()),	80 standalone_vad_(StandaloneVad::Create()),

81 audio_content_fid_(NULL) {	81 audio_content_fid_(NULL) {

82 for (int n = 0; n < kMaxNumFrames; n++)	82 for (size_t n = 0; n < kMaxNumFrames; n++)

83 video_vad_[n] = 0.5;	83 video_vad_[n] = 0.5;

84 }	84 }

85	85

86 ~AgcStat() {	86 ~AgcStat() {

87 if (audio_content_fid_ != NULL) {	87 if (audio_content_fid_ != NULL) {

88 fclose(audio_content_fid_);	88 fclose(audio_content_fid_);

89 }	89 }

90 }	90 }

91	91

92 void set_audio_content_file(FILE* audio_content_fid) {	92 void set_audio_content_file(FILE* audio_content_fid) {

(...skipping 16 matching lines...) Expand all Loading...
109 frame.samples_per_channel_);	109 frame.samples_per_channel_);

110 }	110 }

111 if (features.num_frames > 0) {	111 if (features.num_frames > 0) {

112 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};	112 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};

113 if (FLAGS_standalone_vad) {	113 if (FLAGS_standalone_vad) {

114 standalone_vad_->GetActivity(p, kMaxNumFrames);	114 standalone_vad_->GetActivity(p, kMaxNumFrames);

115 }	115 }

116 // TODO(turajs) combining and limiting are used in the source files as	116 // TODO(turajs) combining and limiting are used in the source files as

117 // well they can be moved to utility.	117 // well they can be moved to utility.

118 // Combine Video and stand-alone VAD.	118 // Combine Video and stand-alone VAD.

119 for (int n = 0; n < features.num_frames; n++) {	119 for (size_t n = 0; n < features.num_frames; n++) {

120 double p_active = p[n] * video_vad_[n];	120 double p_active = p[n] * video_vad_[n];

121 double p_passive = (1 - p[n]) * (1 - video_vad_[n]);	121 double p_passive = (1 - p[n]) * (1 - video_vad_[n]);

122 p[n] = p_active / (p_active + p_passive);	122 p[n] = p_active / (p_active + p_passive);

123 // Limit probabilities.	123 // Limit probabilities.

124 p[n] = std::min(std::max(p[n], 0.01), 0.99);	124 p[n] = std::min(std::max(p[n], 0.01), 0.99);

125 }	125 }

126 if (vad_->VoicingProbability(features, p) < 0)	126 if (vad_->VoicingProbability(features, p) < 0)

127 return -1;	127 return -1;

128 for (int n = 0; n < features.num_frames; n++) {	128 for (size_t n = 0; n < features.num_frames; n++) {

129 audio_content_->Update(features.rms[n], p[n]);	129 audio_content_->Update(features.rms[n], p[n]);

130 double ac = audio_content_->AudioContent();	130 double ac = audio_content_->AudioContent();

131 if (audio_content_fid_ != NULL) {	131 if (audio_content_fid_ != NULL) {

132 fwrite(&ac, sizeof(ac), 1, audio_content_fid_);	132 fwrite(&ac, sizeof(ac), 1, audio_content_fid_);

133 }	133 }

134 if (ac > kAgcAnalWindowSamples * activity_threshold_) {	134 if (ac > kAgcAnalWindowSamples * activity_threshold_) {

135 combined_vad[n] = 1;	135 combined_vad[n] = 1;

136 } else {	136 } else {

137 combined_vad[n] = 0;	137 combined_vad[n] = 0;

138 }	138 }

139 }	139 }

140 video_index_ = 0;	140 video_index_ = 0;

141 }	141 }

142 return features.num_frames;	142 return static_cast<int>(features.num_frames);

143 }	143 }

144	144

145 void Reset() {	145 void Reset() {

146 audio_content_->Reset();	146 audio_content_->Reset();

147 }	147 }

148	148

149 void SetActivityThreshold(double activity_threshold) {	149 void SetActivityThreshold(double activity_threshold) {

150 activity_threshold_ = activity_threshold;	150 activity_threshold_ = activity_threshold;

151 }	151 }

152	152

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 double p_video = 0.5;	239 double p_video = 0.5;

240 int total_active = 0;	240 int total_active = 0;

241 int total_passive = 0;	241 int total_passive = 0;

242 int total_false_positive = 0;	242 int total_false_positive = 0;

243 int total_missed_detection = 0;	243 int total_missed_detection = 0;

244 int onset_adaptation = 0;	244 int onset_adaptation = 0;

245 int num_onsets = 0;	245 int num_onsets = 0;

246 bool onset = false;	246 bool onset = false;

247 uint8_t previous_true_vad = 0;	247 uint8_t previous_true_vad = 0;

248 int num_not_adapted = 0;	248 int num_not_adapted = 0;

249 int true_vad_index = 0;	249 size_t true_vad_index = 0;

250 bool in_false_positive_region = false;	250 bool in_false_positive_region = false;

251 int total_false_positive_duration = 0;	251 int total_false_positive_duration = 0;

252 bool video_adapted = false;	252 bool video_adapted = false;

253 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),	253 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),

254 kSamplesToRead, pcm_fid)) {	254 kSamplesToRead, pcm_fid)) {

255 assert(true_vad_index < kMaxNumFrames);	255 assert(true_vad_index < kMaxNumFrames);

256 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,	256 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,

257 true_vad_fid))	257 true_vad_fid))

258 << "Size mismatch between True-VAD and the PCM file.\n";	258 << "Size mismatch between True-VAD and the PCM file.\n";

259 if (video_vad_fid != NULL) {	259 if (video_vad_fid != NULL) {

(...skipping 25 matching lines...) Expand all Loading...
285 agc_stat.Reset();	285 agc_stat.Reset();

286 }	286 }

287 true_vad_index++;	287 true_vad_index++;

288	288

289 DitherSilence(&frame);	289 DitherSilence(&frame);

290	290

291 ret_val = agc_stat.AddAudio(frame, p_video, agc_vad);	291 ret_val = agc_stat.AddAudio(frame, p_video, agc_vad);

292 ASSERT_GE(ret_val, 0);	292 ASSERT_GE(ret_val, 0);

293	293

294 if (ret_val > 0) {	294 if (ret_val > 0) {

295 ASSERT_EQ(true_vad_index, ret_val);	295 ASSERT_EQ(true_vad_index, static_cast<size_t>(ret_val));

296 for (int n = 0; n < ret_val; n++) {	296 for (int n = 0; n < ret_val; n++) {

297 if (true_vad[n] == 1) {	297 if (true_vad[n] == 1) {

298 total_active++;	298 total_active++;

299 if (previous_true_vad == 0) {	299 if (previous_true_vad == 0) {

300 num_onsets++;	300 num_onsets++;

301 onset = true;	301 onset = true;

302 }	302 }

303 if (agc_vad[n] == 0) {	303 if (agc_vad[n] == 0) {

304 total_missed_detection++;	304 total_missed_detection++;

305 if (onset)	305 if (onset)

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
375 " one probability per frame.\n"	375 " one probability per frame.\n"

376 "\nUsage:\n\n"	376 "\nUsage:\n\n"

377 "activity_metric input_pcm [options]\n"	377 "activity_metric input_pcm [options]\n"

378 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "	378 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "

379 "format.\n\n";	379 "format.\n\n";

380 google::SetUsageMessage(kUsage);	380 google::SetUsageMessage(kUsage);

381 google::ParseCommandLineFlags(&argc, &argv, true);	381 google::ParseCommandLineFlags(&argc, &argv, true);

382 webrtc::void_main(argc, argv);	382 webrtc::void_main(argc, argv);

383 return 0;	383 return 0;

384 }	384 }

OLD	NEW

« no previous file with comments | « webrtc/test/fake_audio_device.cc ('k') | webrtc/tools/agc/agc_manager.cc » ('j') | no next file with comments »