OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
56 | 56 |
57 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a | 57 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a |
58 // silence frame. Otherwise true VAD would drift with respect to the audio. | 58 // silence frame. Otherwise true VAD would drift with respect to the audio. |
59 // We only consider mono inputs. | 59 // We only consider mono inputs. |
60 static void DitherSilence(AudioFrame* frame) { | 60 static void DitherSilence(AudioFrame* frame) { |
61 ASSERT_EQ(1u, frame->num_channels_); | 61 ASSERT_EQ(1u, frame->num_channels_); |
62 const double kRmsSilence = 5; | 62 const double kRmsSilence = 5; |
63 const double sum_squared_silence = kRmsSilence * kRmsSilence * | 63 const double sum_squared_silence = kRmsSilence * kRmsSilence * |
64 frame->samples_per_channel_; | 64 frame->samples_per_channel_; |
65 double sum_squared = 0; | 65 double sum_squared = 0; |
| 66 int16_t* frame_data = frame->mutable_data(); |
66 for (size_t n = 0; n < frame->samples_per_channel_; n++) | 67 for (size_t n = 0; n < frame->samples_per_channel_; n++) |
67 sum_squared += frame->data_[n] * frame->data_[n]; | 68 sum_squared += frame_data[n] * frame_data[n]; |
68 if (sum_squared <= sum_squared_silence) { | 69 if (sum_squared <= sum_squared_silence) { |
69 for (size_t n = 0; n < frame->samples_per_channel_; n++) | 70 for (size_t n = 0; n < frame->samples_per_channel_; n++) |
70 frame->data_[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. | 71 frame_data[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. |
71 } | 72 } |
72 } | 73 } |
73 | 74 |
74 class AgcStat { | 75 class AgcStat { |
75 public: | 76 public: |
76 AgcStat() | 77 AgcStat() |
77 : video_index_(0), | 78 : video_index_(0), |
78 activity_threshold_(kDefaultActivityThreshold), | 79 activity_threshold_(kDefaultActivityThreshold), |
79 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), | 80 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), |
80 audio_processing_(new VadAudioProc()), | 81 audio_processing_(new VadAudioProc()), |
(...skipping 16 matching lines...) Expand all Loading... |
97 | 98 |
98 int AddAudio(const AudioFrame& frame, double p_video, | 99 int AddAudio(const AudioFrame& frame, double p_video, |
99 int* combined_vad) { | 100 int* combined_vad) { |
100 if (frame.num_channels_ != 1 || | 101 if (frame.num_channels_ != 1 || |
101 frame.samples_per_channel_ != | 102 frame.samples_per_channel_ != |
102 kSampleRateHz / 100 || | 103 kSampleRateHz / 100 || |
103 frame.sample_rate_hz_ != kSampleRateHz) | 104 frame.sample_rate_hz_ != kSampleRateHz) |
104 return -1; | 105 return -1; |
105 video_vad_[video_index_++] = p_video; | 106 video_vad_[video_index_++] = p_video; |
106 AudioFeatures features; | 107 AudioFeatures features; |
| 108 const int16_t* frame_data = frame.data(); |
107 audio_processing_->ExtractFeatures( | 109 audio_processing_->ExtractFeatures( |
108 frame.data_, frame.samples_per_channel_, &features); | 110 frame_data, frame.samples_per_channel_, &features); |
109 if (FLAG_standalone_vad) { | 111 if (FLAG_standalone_vad) { |
110 standalone_vad_->AddAudio(frame.data_, | 112 standalone_vad_->AddAudio(frame_data, |
111 frame.samples_per_channel_); | 113 frame.samples_per_channel_); |
112 } | 114 } |
113 if (features.num_frames > 0) { | 115 if (features.num_frames > 0) { |
114 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; | 116 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; |
115 if (FLAG_standalone_vad) { | 117 if (FLAG_standalone_vad) { |
116 standalone_vad_->GetActivity(p, kMaxNumFrames); | 118 standalone_vad_->GetActivity(p, kMaxNumFrames); |
117 } | 119 } |
118 // TODO(turajs) combining and limiting are used in the source files as | 120 // TODO(turajs) combining and limiting are used in the source files as |
119 // well they can be moved to utility. | 121 // well they can be moved to utility. |
120 // Combine Video and stand-alone VAD. | 122 // Combine Video and stand-alone VAD. |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
245 int total_missed_detection = 0; | 247 int total_missed_detection = 0; |
246 int onset_adaptation = 0; | 248 int onset_adaptation = 0; |
247 int num_onsets = 0; | 249 int num_onsets = 0; |
248 bool onset = false; | 250 bool onset = false; |
249 uint8_t previous_true_vad = 0; | 251 uint8_t previous_true_vad = 0; |
250 int num_not_adapted = 0; | 252 int num_not_adapted = 0; |
251 size_t true_vad_index = 0; | 253 size_t true_vad_index = 0; |
252 bool in_false_positive_region = false; | 254 bool in_false_positive_region = false; |
253 int total_false_positive_duration = 0; | 255 int total_false_positive_duration = 0; |
254 bool video_adapted = false; | 256 bool video_adapted = false; |
255 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), | 257 while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t), |
256 kSamplesToRead, pcm_fid)) { | 258 kSamplesToRead, pcm_fid)) { |
257 assert(true_vad_index < kMaxNumFrames); | 259 assert(true_vad_index < kMaxNumFrames); |
258 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, | 260 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, |
259 true_vad_fid)) | 261 true_vad_fid)) |
260 << "Size mismatch between True-VAD and the PCM file.\n"; | 262 << "Size mismatch between True-VAD and the PCM file.\n"; |
261 if (video_vad_fid != NULL) { | 263 if (video_vad_fid != NULL) { |
262 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << | 264 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << |
263 "Not enough video-based VAD probabilities."; | 265 "Not enough video-based VAD probabilities."; |
264 } | 266 } |
265 | 267 |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
385 return 0; | 387 return 0; |
386 } | 388 } |
387 rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true); | 389 rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true); |
388 if (FLAG_help) { | 390 if (FLAG_help) { |
389 rtc::FlagList::Print(nullptr, false); | 391 rtc::FlagList::Print(nullptr, false); |
390 return 0; | 392 return 0; |
391 } | 393 } |
392 webrtc::void_main(argc, argv); | 394 webrtc::void_main(argc, argv); |
393 return 0; | 395 return 0; |
394 } | 396 } |
OLD | NEW |