webrtc/tools/agc/activity_metric.cc - Issue 2750783004: Add mute state field to AudioFrame.

Side by Side Diff: webrtc/tools/agc/activity_metric.cc

Issue 2750783004: Add mute state field to AudioFrame. (Closed)

Patch Set: don't return from Add() too early Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55	55

56 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a	56 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a

57 // silence frame. Otherwise true VAD would drift with respect to the audio.	57 // silence frame. Otherwise true VAD would drift with respect to the audio.

58 // We only consider mono inputs.	58 // We only consider mono inputs.

59 static void DitherSilence(AudioFrame* frame) {	59 static void DitherSilence(AudioFrame* frame) {

60 ASSERT_EQ(1u, frame->num_channels_);	60 ASSERT_EQ(1u, frame->num_channels_);

61 const double kRmsSilence = 5;	61 const double kRmsSilence = 5;

62 const double sum_squared_silence = kRmsSilence * kRmsSilence *	62 const double sum_squared_silence = kRmsSilence * kRmsSilence *

63 frame->samples_per_channel_;	63 frame->samples_per_channel_;

64 double sum_squared = 0;	64 double sum_squared = 0;

	65 int16_t* frame_data = frame->mutable_data();

65 for (size_t n = 0; n < frame->samples_per_channel_; n++)	66 for (size_t n = 0; n < frame->samples_per_channel_; n++)

66 sum_squared += frame->data_[n] * frame->data_[n];	67 sum_squared += frame_data[n] * frame_data[n];

67 if (sum_squared <= sum_squared_silence) {	68 if (sum_squared <= sum_squared_silence) {

68 for (size_t n = 0; n < frame->samples_per_channel_; n++)	69 for (size_t n = 0; n < frame->samples_per_channel_; n++)

69 frame->data_[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe.	70 frame_data[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe.

70 }	71 }

71 }	72 }

72	73

73 class AgcStat {	74 class AgcStat {

74 public:	75 public:

75 AgcStat()	76 AgcStat()

76 : video_index_(0),	77 : video_index_(0),

77 activity_threshold_(kDefaultActivityThreshold),	78 activity_threshold_(kDefaultActivityThreshold),

78 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)),	79 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)),

79 audio_processing_(new VadAudioProc()),	80 audio_processing_(new VadAudioProc()),

(...skipping 16 matching lines...) Expand all Loading...
96	97

97 int AddAudio(const AudioFrame& frame, double p_video,	98 int AddAudio(const AudioFrame& frame, double p_video,

98 int* combined_vad) {	99 int* combined_vad) {

99 if (frame.num_channels_ != 1 \|\|	100 if (frame.num_channels_ != 1 \|\|

100 frame.samples_per_channel_ !=	101 frame.samples_per_channel_ !=

101 kSampleRateHz / 100 \|\|	102 kSampleRateHz / 100 \|\|

102 frame.sample_rate_hz_ != kSampleRateHz)	103 frame.sample_rate_hz_ != kSampleRateHz)

103 return -1;	104 return -1;

104 video_vad_[video_index_++] = p_video;	105 video_vad_[video_index_++] = p_video;

105 AudioFeatures features;	106 AudioFeatures features;

	107 const int16_t* frame_data = frame.data();

106 audio_processing_->ExtractFeatures(	108 audio_processing_->ExtractFeatures(

107 frame.data_, frame.samples_per_channel_, &features);	109 frame_data, frame.samples_per_channel_, &features);

108 if (FLAGS_standalone_vad) {	110 if (FLAGS_standalone_vad) {

109 standalone_vad_->AddAudio(frame.data_,	111 standalone_vad_->AddAudio(frame_data,

110 frame.samples_per_channel_);	112 frame.samples_per_channel_);

111 }	113 }

112 if (features.num_frames > 0) {	114 if (features.num_frames > 0) {

113 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};	115 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};

114 if (FLAGS_standalone_vad) {	116 if (FLAGS_standalone_vad) {

115 standalone_vad_->GetActivity(p, kMaxNumFrames);	117 standalone_vad_->GetActivity(p, kMaxNumFrames);

116 }	118 }

117 // TODO(turajs) combining and limiting are used in the source files as	119 // TODO(turajs) combining and limiting are used in the source files as

118 // well they can be moved to utility.	120 // well they can be moved to utility.

119 // Combine Video and stand-alone VAD.	121 // Combine Video and stand-alone VAD.

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
244 int total_missed_detection = 0;	246 int total_missed_detection = 0;

245 int onset_adaptation = 0;	247 int onset_adaptation = 0;

246 int num_onsets = 0;	248 int num_onsets = 0;

247 bool onset = false;	249 bool onset = false;

248 uint8_t previous_true_vad = 0;	250 uint8_t previous_true_vad = 0;

249 int num_not_adapted = 0;	251 int num_not_adapted = 0;

250 size_t true_vad_index = 0;	252 size_t true_vad_index = 0;

251 bool in_false_positive_region = false;	253 bool in_false_positive_region = false;

252 int total_false_positive_duration = 0;	254 int total_false_positive_duration = 0;

253 bool video_adapted = false;	255 bool video_adapted = false;

254 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),	256 while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),

255 kSamplesToRead, pcm_fid)) {	257 kSamplesToRead, pcm_fid)) {

256 assert(true_vad_index < kMaxNumFrames);	258 assert(true_vad_index < kMaxNumFrames);

257 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,	259 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,

258 true_vad_fid))	260 true_vad_fid))

259 << "Size mismatch between True-VAD and the PCM file.\n";	261 << "Size mismatch between True-VAD and the PCM file.\n";

260 if (video_vad_fid != NULL) {	262 if (video_vad_fid != NULL) {

261 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) <<	263 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) <<

262 "Not enough video-based VAD probabilities.";	264 "Not enough video-based VAD probabilities.";

263 }	265 }

264	266

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
376 " one probability per frame.\n"	378 " one probability per frame.\n"

377 "\nUsage:\n\n"	379 "\nUsage:\n\n"

378 "activity_metric input_pcm [options]\n"	380 "activity_metric input_pcm [options]\n"

379 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "	381 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "

380 "format.\n\n";	382 "format.\n\n";

381 google::SetUsageMessage(kUsage);	383 google::SetUsageMessage(kUsage);

382 google::ParseCommandLineFlags(&argc, &argv, true);	384 google::ParseCommandLineFlags(&argc, &argv, true);

383 webrtc::void_main(argc, argv);	385 webrtc::void_main(argc, argv);

384 return 0;	386 return 0;

385 }	387 }

OLD	NEW

« webrtc/modules/include/module_common_types.h ('K') | « webrtc/modules/include/module_common_types.h ('k') | webrtc/voice_engine/BUILD.gn » ('j') | no next file with comments »