webrtc/tools/agc/activity_metric.cc - Issue 2965593002: Move webrtc/{tools => rtc_tools}

Unified Diff: webrtc/tools/agc/activity_metric.cc

Issue 2965593002: Move webrtc/{tools => rtc_tools} (Closed)

Patch Set: Adding back root changes Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: webrtc/tools/agc/activity_metric.cc

diff --git a/webrtc/tools/agc/activity_metric.cc b/webrtc/tools/agc/activity_metric.cc

deleted file mode 100644

index 8ea193913cb43a736dcb2adbc9f85b6c422ffa1d..0000000000000000000000000000000000000000

--- a/webrtc/tools/agc/activity_metric.cc

+++ /dev/null

@@ -1,395 +0,0 @@

-/*

- *

- * Use of this source code is governed by a BSD-style license

- * that can be found in the LICENSE file in the root of the source

- * tree. An additional intellectual property rights grant can be found

- * in the file PATENTS. All contributing project authors may

- * be found in the AUTHORS file in the root of the source tree.

- */

-#include <math.h>

-#include <stdio.h>

-#include <stdlib.h>

-#include <algorithm>

-#include <memory>

-#include "webrtc/base/flags.h"

-#include "webrtc/base/safe_minmax.h"

-#include "webrtc/modules/audio_processing/agc/agc.h"

-#include "webrtc/modules/audio_processing/agc/loudness_histogram.h"

-#include "webrtc/modules/audio_processing/agc/utility.h"

-#include "webrtc/modules/audio_processing/vad/common.h"

-#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"

-#include "webrtc/modules/audio_processing/vad/standalone_vad.h"

-#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"

-#include "webrtc/modules/include/module_common_types.h"

-#include "webrtc/test/gtest.h"

-static const int kAgcAnalWindowSamples = 100;

-static const float kDefaultActivityThreshold = 0.3f;

-DEFINE_bool(standalone_vad, true, "enable stand-alone VAD");

-DEFINE_string(true_vad, "", "name of a file containing true VAD in 'int'"

- " format");

-DEFINE_string(video_vad, "", "name of a file containing video VAD (activity"

- " probabilities) in double format. One activity per 10ms is"

- " required. If no file is given the video information is not"

- " incorporated. Negative activity is interpreted as video is"

- " not adapted and the statistics are not computed during"

- " the learning phase. Note that the negative video activities"

- " are ONLY allowed at the beginning.");

-DEFINE_string(result, "", "name of a file to write the results. The results"

- " will be appended to the end of the file. This is optional.");

-DEFINE_string(audio_content, "", "name of a file where audio content is written"

- " to, in double format.");

-DEFINE_float(activity_threshold, kDefaultActivityThreshold,

- "Activity threshold");

-DEFINE_bool(help, false, "prints this message");

-namespace webrtc {

-// TODO(turajs) A new CL will be committed soon where ExtractFeatures will

-// notify the caller of "silence" input, instead of bailing out. We would not

-// need the following function when such a change is made.

-// Add some dither to quiet frames. This avoids the ExtractFeatures skip a

-// silence frame. Otherwise true VAD would drift with respect to the audio.

-// We only consider mono inputs.

-static void DitherSilence(AudioFrame* frame) {

- ASSERT_EQ(1u, frame->num_channels_);

- const double kRmsSilence = 5;

- const double sum_squared_silence = kRmsSilence * kRmsSilence *

- frame->samples_per_channel_;

- double sum_squared = 0;

- int16_t* frame_data = frame->mutable_data();

- for (size_t n = 0; n < frame->samples_per_channel_; n++)

- sum_squared += frame_data[n] * frame_data[n];

- if (sum_squared <= sum_squared_silence) {

- for (size_t n = 0; n < frame->samples_per_channel_; n++)

- frame_data[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe.

- }

-class AgcStat {

- public:

- AgcStat()

- : video_index_(0),

- activity_threshold_(kDefaultActivityThreshold),

- audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)),

- audio_processing_(new VadAudioProc()),

- vad_(new PitchBasedVad()),

- standalone_vad_(StandaloneVad::Create()),

- audio_content_fid_(NULL) {

- for (size_t n = 0; n < kMaxNumFrames; n++)

- video_vad_[n] = 0.5;

- }

- ~AgcStat() {

- if (audio_content_fid_ != NULL) {

- fclose(audio_content_fid_);

- }

- void set_audio_content_file(FILE* audio_content_fid) {

- audio_content_fid_ = audio_content_fid;

- }

- int AddAudio(const AudioFrame& frame, double p_video,

- int* combined_vad) {

- if (frame.num_channels_ != 1 ||

- frame.samples_per_channel_ !=

- kSampleRateHz / 100 ||

- frame.sample_rate_hz_ != kSampleRateHz)

- return -1;

- video_vad_[video_index_++] = p_video;

- AudioFeatures features;

- const int16_t* frame_data = frame.data();

- audio_processing_->ExtractFeatures(

- frame_data, frame.samples_per_channel_, &features);

- if (FLAG_standalone_vad) {

- standalone_vad_->AddAudio(frame_data,

- frame.samples_per_channel_);

- }

- if (features.num_frames > 0) {

- double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5};

- if (FLAG_standalone_vad) {

- standalone_vad_->GetActivity(p, kMaxNumFrames);

- }

- // TODO(turajs) combining and limiting are used in the source files as

- // well they can be moved to utility.

- // Combine Video and stand-alone VAD.

- for (size_t n = 0; n < features.num_frames; n++) {

- double p_active = p[n] * video_vad_[n];

- double p_passive = (1 - p[n]) * (1 - video_vad_[n]);

- p[n] = rtc::SafeClamp(p_active / (p_active + p_passive), 0.01, 0.99);

- }

- if (vad_->VoicingProbability(features, p) < 0)

- return -1;

- for (size_t n = 0; n < features.num_frames; n++) {

- audio_content_->Update(features.rms[n], p[n]);

- double ac = audio_content_->AudioContent();

- if (audio_content_fid_ != NULL) {

- fwrite(&ac, sizeof(ac), 1, audio_content_fid_);

- }

- if (ac > kAgcAnalWindowSamples * activity_threshold_) {

- combined_vad[n] = 1;

- } else {

- combined_vad[n] = 0;

- }

- video_index_ = 0;

- }

- return static_cast<int>(features.num_frames);

- }

- void Reset() {

- audio_content_->Reset();

- }

- void SetActivityThreshold(double activity_threshold) {

- activity_threshold_ = activity_threshold;

- }

- private:

- int video_index_;

- double activity_threshold_;

- double video_vad_[kMaxNumFrames];

- std::unique_ptr<LoudnessHistogram> audio_content_;

- std::unique_ptr<VadAudioProc> audio_processing_;

- std::unique_ptr<PitchBasedVad> vad_;

- std::unique_ptr<StandaloneVad> standalone_vad_;

- FILE* audio_content_fid_;

-};

-void void_main(int argc, char* argv[]) {

- webrtc::AgcStat agc_stat;

- FILE* pcm_fid = fopen(argv[1], "rb");

- ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1];

- if (argc < 2) {

- fprintf(stderr, "\nNot Enough arguments\n");

- }

- FILE* true_vad_fid = NULL;

- ASSERT_GT(strlen(FLAG_true_vad), 0u) << "Specify the file containing true "

- "VADs using --true_vad flag.";

- true_vad_fid = fopen(FLAG_true_vad, "rb");

- ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " <<

- FLAG_true_vad;

- FILE* results_fid = NULL;

- if (strlen(FLAG_result) > 0) {

- // True if this is the first time writing to this function and we add a

- // header to the beginning of the file.

- bool write_header;

- // Open in the read mode. If it fails, the file doesn't exist and has to

- // write a header for it. Otherwise no need to write a header.

- results_fid = fopen(FLAG_result, "r");

- if (results_fid == NULL) {

- write_header = true;

- } else {

- fclose(results_fid);

- write_header = false;

- }

- // Open in append mode.

- results_fid = fopen(FLAG_result, "a");

- ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " <<

- FLAG_result << ", to write the results.";

- // Write the header if required.

- if (write_header) {

- fprintf(results_fid, "%% Total Active, Misdetection, "

- "Total inactive, False Positive, On-sets, Missed segments, "

- "Average response\n");

- }

- FILE* video_vad_fid = NULL;

- if (strlen(FLAG_video_vad) > 0) {

- video_vad_fid = fopen(FLAG_video_vad, "rb");

- ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " <<

- FLAG_video_vad << " to read video-based VAD decisions.\n";

- }

- // AgsStat will be the owner of this file and will close it at its

- // destructor.

- FILE* audio_content_fid = NULL;

- if (strlen(FLAG_audio_content) > 0) {

- audio_content_fid = fopen(FLAG_audio_content, "wb");

- ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " <<

- FLAG_audio_content << " to write audio-content.\n";

- agc_stat.set_audio_content_file(audio_content_fid);

- }

- webrtc::AudioFrame frame;

- frame.num_channels_ = 1;

- frame.sample_rate_hz_ = 16000;

- frame.samples_per_channel_ = frame.sample_rate_hz_ / 100;

- const size_t kSamplesToRead = frame.num_channels_ *

- frame.samples_per_channel_;

- agc_stat.SetActivityThreshold(FLAG_activity_threshold);

- int ret_val = 0;

- int num_frames = 0;

- int agc_vad[kMaxNumFrames];

- uint8_t true_vad[kMaxNumFrames];

- double p_video = 0.5;

- int total_active = 0;

- int total_passive = 0;

- int total_false_positive = 0;

- int total_missed_detection = 0;

- int onset_adaptation = 0;

- int num_onsets = 0;

- bool onset = false;

- uint8_t previous_true_vad = 0;

- int num_not_adapted = 0;

- size_t true_vad_index = 0;

- bool in_false_positive_region = false;

- int total_false_positive_duration = 0;

- bool video_adapted = false;

- while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),

- kSamplesToRead, pcm_fid)) {

- assert(true_vad_index < kMaxNumFrames);

- ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,

- true_vad_fid))

- << "Size mismatch between True-VAD and the PCM file.\n";

- if (video_vad_fid != NULL) {

- ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) <<

- "Not enough video-based VAD probabilities.";

- }

- // Negative video activity indicates that the video-based VAD is not yet

- // adapted. Disregards the learning phase in statistics.

- if (p_video < 0) {

- if (video_adapted) {

- fprintf(stderr, "Negative video probabilities ONLY allowed at the "

- "beginning of the sequence, not in the middle.\n");

- exit(1);

- }

- continue;

- } else {

- video_adapted = true;

- }

- num_frames++;

- uint8_t last_true_vad;

- if (true_vad_index == 0) {

- last_true_vad = previous_true_vad;

- } else {

- last_true_vad = true_vad[true_vad_index - 1];

- }

- if (last_true_vad == 1 && true_vad[true_vad_index] == 0) {

- agc_stat.Reset();

- }

- true_vad_index++;

- DitherSilence(&frame);

- ret_val = agc_stat.AddAudio(frame, p_video, agc_vad);

- ASSERT_GE(ret_val, 0);

- if (ret_val > 0) {

- ASSERT_EQ(true_vad_index, static_cast<size_t>(ret_val));

- for (int n = 0; n < ret_val; n++) {

- if (true_vad[n] == 1) {

- total_active++;

- if (previous_true_vad == 0) {

- num_onsets++;

- onset = true;

- }

- if (agc_vad[n] == 0) {

- total_missed_detection++;

- if (onset)

- onset_adaptation++;

- } else {

- in_false_positive_region = false;

- onset = false;

- }

- } else if (true_vad[n] == 0) {

- // Check if |on_set| flag is still up. If so it means that we totally

- // missed an active region

- if (onset)

- num_not_adapted++;

- onset = false;

- total_passive++;

- if (agc_vad[n] == 1) {

- total_false_positive++;

- in_false_positive_region = true;

- }

- if (in_false_positive_region) {

- total_false_positive_duration++;

- }

- } else {

- ASSERT_TRUE(false) << "Invalid value for true-VAD.\n";

- }

- previous_true_vad = true_vad[n];

- }

- true_vad_index = 0;

- }

- if (results_fid != NULL) {

- fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n",

- total_active,

- total_missed_detection,

- total_passive,

- total_false_positive,

- num_onsets,

- num_not_adapted,

- static_cast<float>(onset_adaptation) / (num_onsets + 1e-12),

- static_cast<float>(total_false_positive_duration) /

- (total_passive + 1e-12));

- }

- fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n",

- total_active,

- total_missed_detection,

- total_passive,

- total_false_positive,

- num_onsets,

- num_not_adapted,

- static_cast<float>(onset_adaptation) / (num_onsets + 1e-12),

- static_cast<float>(total_false_positive_duration) /

- (total_passive + 1e-12));

- fclose(true_vad_fid);

- fclose(pcm_fid);

- if (video_vad_fid != NULL) {

- fclose(video_vad_fid);

- }

- if (results_fid != NULL) {

- fclose(results_fid);

- }

-} // namespace webrtc

-int main(int argc, char* argv[]) {

- if (argc == 1) {

- // Print usage information.

- std::cout <<

- "\nCompute the number of misdetected and false-positive frames. Not\n"

- " that for each frame of audio (10 ms) there should be one true\n"

- " activity. If any video-based activity is given, there should also be\n"

- " one probability per frame.\n"

- "Run with --help for more details on available flags.\n"

- "\nUsage:\n\n"

- "activity_metric input_pcm [options]\n"

- "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits "

- "format.\n\n";

- return 0;

- }

- rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true);

- if (FLAG_help) {

- rtc::FlagList::Print(nullptr, false);

- return 0;

- }

- webrtc::void_main(argc, argv);

- return 0;

« no previous file with comments | « webrtc/tools/OWNERS ('k') | webrtc/tools/author_line_count.sh » ('j') | no next file with comments »