| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 | 55 |
| 56 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a | 56 // Add some dither to quiet frames. This avoids the ExtractFeatures skip a |
| 57 // silence frame. Otherwise true VAD would drift with respect to the audio. | 57 // silence frame. Otherwise true VAD would drift with respect to the audio. |
| 58 // We only consider mono inputs. | 58 // We only consider mono inputs. |
| 59 static void DitherSilence(AudioFrame* frame) { | 59 static void DitherSilence(AudioFrame* frame) { |
| 60 ASSERT_EQ(1u, frame->num_channels_); | 60 ASSERT_EQ(1u, frame->num_channels_); |
| 61 const double kRmsSilence = 5; | 61 const double kRmsSilence = 5; |
| 62 const double sum_squared_silence = kRmsSilence * kRmsSilence * | 62 const double sum_squared_silence = kRmsSilence * kRmsSilence * |
| 63 frame->samples_per_channel_; | 63 frame->samples_per_channel_; |
| 64 double sum_squared = 0; | 64 double sum_squared = 0; |
| 65 int16_t* frame_data = frame->mutable_data(); |
| 65 for (size_t n = 0; n < frame->samples_per_channel_; n++) | 66 for (size_t n = 0; n < frame->samples_per_channel_; n++) |
| 66 sum_squared += frame->data_[n] * frame->data_[n]; | 67 sum_squared += frame_data[n] * frame_data[n]; |
| 67 if (sum_squared <= sum_squared_silence) { | 68 if (sum_squared <= sum_squared_silence) { |
| 68 for (size_t n = 0; n < frame->samples_per_channel_; n++) | 69 for (size_t n = 0; n < frame->samples_per_channel_; n++) |
| 69 frame->data_[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. | 70 frame_data[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. |
| 70 } | 71 } |
| 71 } | 72 } |
| 72 | 73 |
| 73 class AgcStat { | 74 class AgcStat { |
| 74 public: | 75 public: |
| 75 AgcStat() | 76 AgcStat() |
| 76 : video_index_(0), | 77 : video_index_(0), |
| 77 activity_threshold_(kDefaultActivityThreshold), | 78 activity_threshold_(kDefaultActivityThreshold), |
| 78 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), | 79 audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), |
| 79 audio_processing_(new VadAudioProc()), | 80 audio_processing_(new VadAudioProc()), |
| (...skipping 16 matching lines...) Expand all Loading... |
| 96 | 97 |
| 97 int AddAudio(const AudioFrame& frame, double p_video, | 98 int AddAudio(const AudioFrame& frame, double p_video, |
| 98 int* combined_vad) { | 99 int* combined_vad) { |
| 99 if (frame.num_channels_ != 1 || | 100 if (frame.num_channels_ != 1 || |
| 100 frame.samples_per_channel_ != | 101 frame.samples_per_channel_ != |
| 101 kSampleRateHz / 100 || | 102 kSampleRateHz / 100 || |
| 102 frame.sample_rate_hz_ != kSampleRateHz) | 103 frame.sample_rate_hz_ != kSampleRateHz) |
| 103 return -1; | 104 return -1; |
| 104 video_vad_[video_index_++] = p_video; | 105 video_vad_[video_index_++] = p_video; |
| 105 AudioFeatures features; | 106 AudioFeatures features; |
| 107 const int16_t* frame_data = frame.data(); |
| 106 audio_processing_->ExtractFeatures( | 108 audio_processing_->ExtractFeatures( |
| 107 frame.data_, frame.samples_per_channel_, &features); | 109 frame_data, frame.samples_per_channel_, &features); |
| 108 if (FLAGS_standalone_vad) { | 110 if (FLAGS_standalone_vad) { |
| 109 standalone_vad_->AddAudio(frame.data_, | 111 standalone_vad_->AddAudio(frame_data, |
| 110 frame.samples_per_channel_); | 112 frame.samples_per_channel_); |
| 111 } | 113 } |
| 112 if (features.num_frames > 0) { | 114 if (features.num_frames > 0) { |
| 113 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; | 115 double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; |
| 114 if (FLAGS_standalone_vad) { | 116 if (FLAGS_standalone_vad) { |
| 115 standalone_vad_->GetActivity(p, kMaxNumFrames); | 117 standalone_vad_->GetActivity(p, kMaxNumFrames); |
| 116 } | 118 } |
| 117 // TODO(turajs) combining and limiting are used in the source files as | 119 // TODO(turajs) combining and limiting are used in the source files as |
| 118 // well they can be moved to utility. | 120 // well they can be moved to utility. |
| 119 // Combine Video and stand-alone VAD. | 121 // Combine Video and stand-alone VAD. |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 244 int total_missed_detection = 0; | 246 int total_missed_detection = 0; |
| 245 int onset_adaptation = 0; | 247 int onset_adaptation = 0; |
| 246 int num_onsets = 0; | 248 int num_onsets = 0; |
| 247 bool onset = false; | 249 bool onset = false; |
| 248 uint8_t previous_true_vad = 0; | 250 uint8_t previous_true_vad = 0; |
| 249 int num_not_adapted = 0; | 251 int num_not_adapted = 0; |
| 250 size_t true_vad_index = 0; | 252 size_t true_vad_index = 0; |
| 251 bool in_false_positive_region = false; | 253 bool in_false_positive_region = false; |
| 252 int total_false_positive_duration = 0; | 254 int total_false_positive_duration = 0; |
| 253 bool video_adapted = false; | 255 bool video_adapted = false; |
| 254 while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), | 256 while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t), |
| 255 kSamplesToRead, pcm_fid)) { | 257 kSamplesToRead, pcm_fid)) { |
| 256 assert(true_vad_index < kMaxNumFrames); | 258 assert(true_vad_index < kMaxNumFrames); |
| 257 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, | 259 ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, |
| 258 true_vad_fid)) | 260 true_vad_fid)) |
| 259 << "Size mismatch between True-VAD and the PCM file.\n"; | 261 << "Size mismatch between True-VAD and the PCM file.\n"; |
| 260 if (video_vad_fid != NULL) { | 262 if (video_vad_fid != NULL) { |
| 261 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << | 263 ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << |
| 262 "Not enough video-based VAD probabilities."; | 264 "Not enough video-based VAD probabilities."; |
| 263 } | 265 } |
| 264 | 266 |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 376 " one probability per frame.\n" | 378 " one probability per frame.\n" |
| 377 "\nUsage:\n\n" | 379 "\nUsage:\n\n" |
| 378 "activity_metric input_pcm [options]\n" | 380 "activity_metric input_pcm [options]\n" |
| 379 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " | 381 "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " |
| 380 "format.\n\n"; | 382 "format.\n\n"; |
| 381 google::SetUsageMessage(kUsage); | 383 google::SetUsageMessage(kUsage); |
| 382 google::ParseCommandLineFlags(&argc, &argv, true); | 384 google::ParseCommandLineFlags(&argc, &argv, true); |
| 383 webrtc::void_main(argc, argv); | 385 webrtc::void_main(argc, argv); |
| 384 return 0; | 386 return 0; |
| 385 } | 387 } |
| OLD | NEW |