Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc

Issue 1693823004: Use VAD to get a better speech power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@pow
Patch Set: Use f for float Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 12 matching lines...) Expand all
23 #include "webrtc/common_audio/wav_file.h" 23 #include "webrtc/common_audio/wav_file.h"
24 #include "webrtc/modules/audio_processing/audio_buffer.h" 24 #include "webrtc/modules/audio_processing/audio_buffer.h"
25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h" 25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"
26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" 26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
27 27
28 using std::complex; 28 using std::complex;
29 29
30 namespace webrtc { 30 namespace webrtc {
31 namespace { 31 namespace {
32 32
33 DEFINE_double(clear_alpha, 0.9, "Power decay factor for clear data.");
34 DEFINE_int32(sample_rate,
35 16000,
36 "Audio sample rate used in the input and output files.");
37 DEFINE_int32(ana_rate,
38 60,
39 "Analysis rate; gains recalculated every N blocks.");
40 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");
41
42 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); 33 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
43 DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); 34 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
44 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); 35 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
45 36
46 const size_t kNumChannels = 1;
47
48 // void function for gtest 37 // void function for gtest
49 void void_main(int argc, char* argv[]) { 38 void void_main(int argc, char* argv[]) {
50 google::SetUsageMessage( 39 google::SetUsageMessage(
51 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); 40 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
52 google::ParseCommandLineFlags(&argc, &argv, true); 41 google::ParseCommandLineFlags(&argc, &argv, true);
53 42
54 size_t samples; // Number of samples in input PCM file
55 size_t fragment_size; // Number of samples to process at a time
56 // to simulate APM stream processing
57
58 // Load settings and wav input. 43 // Load settings and wav input.
59
60 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size.
61 // Duplicates chunk_length_ in
62 // IntelligibilityEnhancer.
63
64 struct stat in_stat, noise_stat; 44 struct stat in_stat, noise_stat;
65 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0) 45 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0)
66 << "Empty speech file."; 46 << "Empty speech file.";
67 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0) 47 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0)
68 << "Empty noise file."; 48 << "Empty noise file.";
69 49
70 samples = std::min(in_stat.st_size, noise_stat.st_size) / 2; 50 const size_t samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;
71 51
72 WavReader in_file(FLAGS_clear_file); 52 WavReader in_file(FLAGS_clear_file);
73 std::vector<float> in_fpcm(samples); 53 std::vector<float> in_fpcm(samples);
74 in_file.ReadSamples(samples, &in_fpcm[0]); 54 in_file.ReadSamples(samples, &in_fpcm[0]);
75 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]); 55 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]);
76 56
77 WavReader noise_file(FLAGS_noise_file); 57 WavReader noise_file(FLAGS_noise_file);
78 std::vector<float> noise_fpcm(samples); 58 std::vector<float> noise_fpcm(samples);
79 noise_file.ReadSamples(samples, &noise_fpcm[0]); 59 noise_file.ReadSamples(samples, &noise_fpcm[0]);
80 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]); 60 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]);
81 61
82 // Run intelligibility enhancement. 62 // Run intelligibility enhancement.
83 IntelligibilityEnhancer::Config config; 63 IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels());
84 config.sample_rate_hz = FLAGS_sample_rate;
85 config.decay_rate = static_cast<float>(FLAGS_clear_alpha);
86 config.analysis_rate = FLAGS_ana_rate;
87 config.gain_change_limit = FLAGS_gain_limit;
88 IntelligibilityEnhancer enh(config);
89 rtc::CriticalSection crit; 64 rtc::CriticalSection crit;
90 NoiseSuppressionImpl ns(&crit); 65 NoiseSuppressionImpl ns(&crit);
91 ns.Initialize(kNumChannels, FLAGS_sample_rate); 66 ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
92 ns.Enable(true); 67 ns.Enable(true);
93 68
94 AudioBuffer capture_audio(fragment_size, 69 // Mirror real time APM chunk size. Duplicates chunk_length_ in
95 kNumChannels, 70 // IntelligibilityEnhancer.
96 fragment_size, 71 size_t fragment_size = in_file.sample_rate() / 100;
97 kNumChannels, 72 AudioBuffer capture_audio(fragment_size, noise_file.num_channels(),
73 fragment_size, noise_file.num_channels(),
98 fragment_size); 74 fragment_size);
99 StreamConfig stream_config(FLAGS_sample_rate, kNumChannels); 75 StreamConfig stream_config(in_file.sample_rate(), noise_file.num_channels());
100 76
101 // Slice the input into smaller chunks, as the APM would do, and feed them 77 // Slice the input into smaller chunks, as the APM would do, and feed them
102 // through the enhancer. 78 // through the enhancer.
103 float* clear_cursor = &in_fpcm[0]; 79 float* clear_cursor = &in_fpcm[0];
104 float* noise_cursor = &noise_fpcm[0]; 80 float* noise_cursor = &noise_fpcm[0];
105 81
106 for (size_t i = 0; i < samples; i += fragment_size) { 82 for (size_t i = 0; i < samples; i += fragment_size) {
107 capture_audio.CopyFrom(&noise_cursor, stream_config); 83 capture_audio.CopyFrom(&noise_cursor, stream_config);
108 ns.AnalyzeCaptureAudio(&capture_audio); 84 ns.AnalyzeCaptureAudio(&capture_audio);
109 ns.ProcessCaptureAudio(&capture_audio); 85 ns.ProcessCaptureAudio(&capture_audio);
110 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate()); 86 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());
111 enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels); 87 enh.ProcessRenderAudio(&clear_cursor, in_file.sample_rate(),
88 in_file.num_channels());
112 clear_cursor += fragment_size; 89 clear_cursor += fragment_size;
113 noise_cursor += fragment_size; 90 noise_cursor += fragment_size;
114 } 91 }
115 92
116 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]); 93 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]);
117 94
118 WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels); 95 WavWriter out_file(FLAGS_out_file,
96 in_file.sample_rate(),
97 in_file.num_channels());
119 out_file.WriteSamples(&in_fpcm[0], samples); 98 out_file.WriteSamples(&in_fpcm[0], samples);
120 } 99 }
121 100
122 } // namespace 101 } // namespace
123 } // namespace webrtc 102 } // namespace webrtc
124 103
125 int main(int argc, char* argv[]) { 104 int main(int argc, char* argv[]) {
126 webrtc::void_main(argc, argv); 105 webrtc::void_main(argc, argv);
127 return 0; 106 return 0;
128 } 107 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698