| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 56 | 56 |
| 57 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a | 57 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a |
| 58 // silence frame. Otherwise true VAD would drift with respect to the audio. | 58 // silence frame. Otherwise true VAD would drift with respect to the audio. |
| 59 // We only consider mono inputs. | 59 // We only consider mono inputs. |
| 60 static void DitherSilence(AudioFrame* frame) { | 60 static void DitherSilence(AudioFrame* frame) { |
| 61 ASSERT_EQ(1u, frame->num_channels_); | 61 ASSERT_EQ(1u, frame->num_channels_); |
| 62 const double kRmsSilence = 5; | 62 const double kRmsSilence = 5; |
| 63 const double sum_squared_silence = kRmsSilence * kRmsSilence * | 63 const double sum_squared_silence = kRmsSilence * kRmsSilence * |
| 64 frame->samples_per_channel_; | 64 frame->samples_per_channel_; |
| 65 double sum_squared = 0; | 65 double sum_squared = 0; |
| 66 int16_t* frame_data = frame->mutable_data(); |
| 66 for (size_t n = 0; n < frame->samples_per_channel_; n++) | 67 for (size_t n = 0; n < frame->samples_per_channel_; n++) |
| 67 sum_squared += frame->data_[n] * frame->data_[n]; | 68 sum_squared += frame_data[n] * frame_data[n]; |
| 68 if (sum_squared <= sum_squared_silence) { | 69 if (sum_squared <= sum_squared_silence) { |
| 69 for (size_t n = 0; n < frame->samples_per_channel_; n++) | 70 for (size_t n = 0; n < frame->samples_per_channel_; n++) |
| 70 frame->data_[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. | 71 frame_data[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. |
| 71 } | 72 } |
| 72 } | 73 } |
| 73 | 74 |
| 74 class AgcStat { | 75 class AgcStat { |
| 75 public: | 76 public: |
| 76 AgcStat() | 77 AgcStat() |
| 77 : video_index_(0), | 78 : video_index_(0), |
| 78 activity_threshold_(kDefaultActivityThreshold), | 79 activity_threshold_(kDefaultActivityThreshold), |
| 79 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), | 80 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), |
| 80 audio_processing_(new VadAudioProc()), | 81 audio_processing_(new VadAudioProc()), |
| (...skipping 16 matching lines...) Expand all Loading... |
| 97 | 98 |
| 98 int AddAudio(const AudioFrame& frame, double p_video, | 99 int AddAudio(const AudioFrame& frame, double p_video, |
| 99 int* combined_vad) { | 100 int* combined_vad) { |
| 100 if (frame.num_channels_ != 1 || | 101 if (frame.num_channels_ != 1 || |
| 101 frame.samples_per_channel_ != | 102 frame.samples_per_channel_ != |
| 102 kSampleRateHz / 100 || | 103 kSampleRateHz / 100 || |
| 103 frame.sample_rate_hz_ != kSampleRateHz) | 104 frame.sample_rate_hz_ != kSampleRateHz) |
| 104 return -1; | 105 return -1; |
| 105 video_vad_[video_index_++] = p_video; | 106 video_vad_[video_index_++] = p_video; |
| 106 AudioFeatures features; | 107 AudioFeatures features; |
| 108 const int16_t* frame_data = frame.data(); |
| 107 audio_processing_->ExtractFeatures( | 109 audio_processing_->ExtractFeatures( |
| 108 frame.data_, frame.samples_per_channel_, &features); | 110 frame_data, frame.samples_per_channel_, &features); |
| 109 if (FLAG_standalone_vad) { | 111 if (FLAG_standalone_vad) { |
| 110 standalone_vad_->AddAudio(frame.data_, | 112 standalone_vad_->AddAudio(frame_data, |
| 111 frame.samples_per_channel_); | 113 frame.samples_per_channel_); |
| 112 } | 114 } |
| 113 if (features.num_frames > 0) { | 115 if (features.num_frames > 0) { |
| 114 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; | 116 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; |
| 115 if (FLAG_standalone_vad) { | 117 if (FLAG_standalone_vad) { |
| 116 standalone_vad_->GetActivity(p, kMaxNumFrames); | 118 standalone_vad_->GetActivity(p, kMaxNumFrames); |
| 117 } | 119 } |
| 118 // TODO(turajs) combining and limiting are used in the source files as | 120 // TODO(turajs) combining and limiting are used in the source files as |
| 119 // well they can be moved to utility. | 121 // well they can be moved to utility. |
| 120 // Combine Video and stand-alone VAD. | 122 // Combine Video and stand-alone VAD. |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 245 int total_missed_detection = 0; | 247 int total_missed_detection = 0; |
| 246 int onset_adaptation = 0; | 248 int onset_adaptation = 0; |
| 247 int num_onsets = 0; | 249 int num_onsets = 0; |
| 248 bool onset = false; | 250 bool onset = false; |
| 249 uint8_t previous_true_vad = 0; | 251 uint8_t previous_true_vad = 0; |
| 250 int num_not_adapted = 0; | 252 int num_not_adapted = 0; |
| 251 size_t true_vad_index = 0; | 253 size_t true_vad_index = 0; |
| 252 bool in_false_positive_region = false; | 254 bool in_false_positive_region = false; |
| 253 int total_false_positive_duration = 0; | 255 int total_false_positive_duration = 0; |
| 254 bool video_adapted = false; | 256 bool video_adapted = false; |
| 255 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), | 257 while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t), |
| 256 kSamplesToRead, pcm_fid)) { | 258 kSamplesToRead, pcm_fid)) { |
| 257 assert(true_vad_index < kMaxNumFrames); | 259 assert(true_vad_index < kMaxNumFrames); |
| 258 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, | 260 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, |
| 259 true_vad_fid)) | 261 true_vad_fid)) |
| 260 << "Size mismatch between True-VAD and the PCM file.\n"; | 262 << "Size mismatch between True-VAD and the PCM file.\n"; |
| 261 if (video_vad_fid != NULL) { | 263 if (video_vad_fid != NULL) { |
| 262 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << | 264 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << |
| 263 "Not enough video-based VAD probabilities."; | 265 "Not enough video-based VAD probabilities."; |
| 264 } | 266 } |
| 265 | 267 |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 385 return 0; | 387 return 0; |
| 386 } | 388 } |
| 387 rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true); | 389 rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true); |
| 388 if (FLAG_help) { | 390 if (FLAG_help) { |
| 389 rtc::FlagList::Print(nullptr, false); | 391 rtc::FlagList::Print(nullptr, false); |
| 390 return 0; | 392 return 0; |
| 391 } | 393 } |
| 392 webrtc::void_main(argc, argv); | 394 webrtc::void_main(argc, argv); |
| 393 return 0; | 395 return 0; |
| 394 } | 396 } |
| OLD | NEW |