OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <stdio.h> | 11 #include <stdio.h> |
12 #include <sstream> | 12 #include <sstream> |
13 #include <string> | 13 #include <string> |
14 | 14 |
15 #include "gflags/gflags.h" | 15 #include "gflags/gflags.h" |
16 #include "webrtc/base/checks.h" | 16 #include "webrtc/base/checks.h" |
17 #include "webrtc/base/scoped_ptr.h" | 17 #include "webrtc/base/scoped_ptr.h" |
18 #include "webrtc/common_audio/channel_buffer.h" | 18 #include "webrtc/common_audio/channel_buffer.h" |
19 #include "webrtc/common_audio/wav_file.h" | 19 #include "webrtc/common_audio/wav_file.h" |
20 #include "webrtc/modules/audio_processing/include/audio_processing.h" | 20 #include "webrtc/modules/audio_processing/include/audio_processing.h" |
21 #include "webrtc/modules/audio_processing/test/audio_file_processor.h" | |
21 #include "webrtc/modules/audio_processing/test/protobuf_utils.h" | 22 #include "webrtc/modules/audio_processing/test/protobuf_utils.h" |
22 #include "webrtc/modules/audio_processing/test/test_utils.h" | 23 #include "webrtc/modules/audio_processing/test/test_utils.h" |
23 #include "webrtc/system_wrappers/interface/tick_util.h" | 24 #include "webrtc/system_wrappers/interface/tick_util.h" |
24 #include "webrtc/test/testsupport/trace_to_stderr.h" | 25 #include "webrtc/test/testsupport/trace_to_stderr.h" |
25 | 26 |
26 DEFINE_string(dump, "", "The name of the debug dump file to read from."); | 27 DEFINE_string(dump, "", "The name of the debug dump file to read from."); |
27 DEFINE_string(i, "", "The name of the input file to read from."); | 28 DEFINE_string(i, "", "The name of the input file to read from."); |
28 DEFINE_string(i_rev, "", "The name of the reverse input file to read from."); | |
29 DEFINE_string(o, "out.wav", "Name of the output file to write to."); | 29 DEFINE_string(o, "out.wav", "Name of the output file to write to."); |
peah-webrtc
2015/10/20 21:17:25
This description should be made more precise. For
Andrew MacDonald
2015/10/21 00:29:28
Done.
| |
30 DEFINE_string(o_rev, | 30 DEFINE_int32(out_channels, 1, "Number of output channels."); |
31 "out_rev.wav", | 31 DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz."); |
32 "Name of the reverse output file to write to."); | |
33 DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input."); | |
34 DEFINE_int32(out_sample_rate, 0, | |
35 "Output sample rate in Hz. Defaults to input."); | |
aluebs-webrtc
2015/10/24 00:53:34
Having the output format default to the input one
| |
36 DEFINE_string(mic_positions, "", | 32 DEFINE_string(mic_positions, "", |
37 "Space delimited cartesian coordinates of microphones in meters. " | 33 "Space delimited cartesian coordinates of microphones in meters. " |
38 "The coordinates of each point are contiguous. " | 34 "The coordinates of each point are contiguous. " |
39 "For a two element array: \"x1 y1 z1 x2 y2 z2\""); | 35 "For a two element array: \"x1 y1 z1 x2 y2 z2\""); |
40 | 36 |
41 DEFINE_bool(aec, false, "Enable echo cancellation."); | 37 DEFINE_bool(aec, false, "Enable echo cancellation."); |
42 DEFINE_bool(agc, false, "Enable automatic gain control."); | 38 DEFINE_bool(agc, false, "Enable automatic gain control."); |
43 DEFINE_bool(hpf, false, "Enable high-pass filtering."); | 39 DEFINE_bool(hpf, false, "Enable high-pass filtering."); |
44 DEFINE_bool(ns, false, "Enable noise suppression."); | 40 DEFINE_bool(ns, false, "Enable noise suppression."); |
45 DEFINE_bool(ts, false, "Enable transient suppression."); | 41 DEFINE_bool(ts, false, "Enable transient suppression."); |
(...skipping 10 matching lines...) Expand all Loading... | |
56 | 52 |
57 const int kChunksPerSecond = 100; | 53 const int kChunksPerSecond = 100; |
58 const char kUsage[] = | 54 const char kUsage[] = |
59 "Command-line tool to run audio processing on WAV files. Accepts either\n" | 55 "Command-line tool to run audio processing on WAV files. Accepts either\n" |
60 "an input capture WAV file or protobuf debug dump and writes to an output\n" | 56 "an input capture WAV file or protobuf debug dump and writes to an output\n" |
61 "WAV file.\n" | 57 "WAV file.\n" |
62 "\n" | 58 "\n" |
63 "All components are disabled by default. If any bi-directional components\n" | 59 "All components are disabled by default. If any bi-directional components\n" |
64 "are enabled, only debug dump files are permitted."; | 60 "are enabled, only debug dump files are permitted."; |
65 | 61 |
66 // Returns a StreamConfig corresponding to wav_file if it's non-nullptr. | |
67 // Otherwise returns a default initialized StreamConfig. | |
68 StreamConfig MakeStreamConfig(const WavFile* wav_file) { | |
69 if (wav_file) { | |
70 return {wav_file->sample_rate(), wav_file->num_channels()}; | |
71 } | |
72 return {}; | |
73 } | |
74 | |
75 } // namespace | 62 } // namespace |
76 | 63 |
77 int main(int argc, char* argv[]) { | 64 int main(int argc, char* argv[]) { |
78 google::SetUsageMessage(kUsage); | 65 google::SetUsageMessage(kUsage); |
79 google::ParseCommandLineFlags(&argc, &argv, true); | 66 google::ParseCommandLineFlags(&argc, &argv, true); |
80 | 67 |
81 if (!((FLAGS_i.empty()) ^ (FLAGS_dump.empty()))) { | 68 if (!((FLAGS_i.empty()) ^ (FLAGS_dump.empty()))) { |
82 fprintf(stderr, | 69 fprintf(stderr, |
83 "An input file must be specified with either -i or -dump.\n"); | 70 "An input file must be specified with either -i or -dump.\n"); |
84 return 1; | 71 return 1; |
85 } | 72 } |
86 if (!FLAGS_dump.empty()) { | 73 if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) { |
peah-webrtc
2015/10/20 21:17:25
Most likely, it would make sense here to try openi
Andrew MacDonald
2015/10/21 00:29:28
See comments below.
| |
87 fprintf(stderr, "FIXME: the -dump option is not yet implemented.\n"); | 74 fprintf(stderr, "-aec and -ie require a -dump file.\n"); |
75 return 1; | |
76 } | |
77 if (FLAGS_ts) { | |
78 fprintf(stderr, | |
79 "FIXME(ajm): The transient suppression output is not dumped.\n"); | |
aluebs-webrtc
2015/10/24 00:53:34
What TS output?
Andrew MacDonald
2015/10/29 00:44:50
TS processes the reverse (aka render) stream. Afte
aluebs-webrtc
2015/10/29 01:03:20
You mean the Intelligibility Enhancer?
Andrew MacDonald
2015/10/29 01:14:33
Yikes! Yes that's what I mean. Will fix.
Andrew MacDonald
2015/10/30 00:21:16
Done.
| |
88 return 1; | 80 return 1; |
89 } | 81 } |
90 | 82 |
91 test::TraceToStderr trace_to_stderr(true); | 83 test::TraceToStderr trace_to_stderr(true); |
92 WavReader in_file(FLAGS_i); | |
93 // If the output format is uninitialized, use the input format. | |
94 const int out_channels = | |
95 FLAGS_out_channels ? FLAGS_out_channels : in_file.num_channels(); | |
96 const int out_sample_rate = | |
97 FLAGS_out_sample_rate ? FLAGS_out_sample_rate : in_file.sample_rate(); | |
98 WavWriter out_file(FLAGS_o, out_sample_rate, out_channels); | |
99 | |
100 Config config; | 84 Config config; |
85 if (FLAGS_bf || FLAGS_all) { | |
86 if (FLAGS_mic_positions.empty()) { | |
87 fprintf(stderr, "-mic_positions must be specified when -bf is used.\n"); | |
88 return 1; | |
89 } | |
90 config.Set<Beamforming>( | |
91 new Beamforming(true, ParseArrayGeometry(FLAGS_mic_positions))); | |
92 } | |
101 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all)); | 93 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all)); |
102 config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all)); | 94 config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all)); |
103 | 95 |
104 if (FLAGS_bf || FLAGS_all) { | |
105 const size_t num_mics = in_file.num_channels(); | |
106 const std::vector<Point> array_geometry = | |
107 ParseArrayGeometry(FLAGS_mic_positions, num_mics); | |
108 RTC_CHECK_EQ(array_geometry.size(), num_mics); | |
109 | |
110 config.Set<Beamforming>(new Beamforming(true, array_geometry)); | |
111 } | |
112 | |
113 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); | 96 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); |
114 if (!FLAGS_dump.empty()) { | 97 RTC_CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all)); |
115 RTC_CHECK_EQ(kNoErr, | |
116 ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all)); | |
117 } else if (FLAGS_aec) { | |
118 fprintf(stderr, "-aec requires a -dump file.\n"); | |
119 return -1; | |
120 } | |
121 bool process_reverse = !FLAGS_i_rev.empty(); | |
122 RTC_CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all)); | 98 RTC_CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all)); |
123 RTC_CHECK_EQ(kNoErr, | 99 RTC_CHECK_EQ(kNoErr, |
124 ap->gain_control()->set_mode(GainControl::kFixedDigital)); | 100 ap->gain_control()->set_mode(GainControl::kFixedDigital)); |
peah-webrtc
2015/10/20 21:17:25
Here I think we also need to set the analog gain c
Andrew MacDonald
2015/10/21 00:29:28
Thanks for noticing this. It actually has no effec
peah-webrtc
2015/10/21 08:10:04
Great! Did not know that.
| |
125 RTC_CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all)); | 101 RTC_CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all)); |
126 RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all)); | 102 RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all)); |
127 if (FLAGS_ns_level != -1) | 103 if (FLAGS_ns_level != -1) { |
128 RTC_CHECK_EQ(kNoErr, | 104 RTC_CHECK_EQ(kNoErr, |
129 ap->noise_suppression()->set_level( | 105 ap->noise_suppression()->set_level( |
130 static_cast<NoiseSuppression::Level>(FLAGS_ns_level))); | 106 static_cast<NoiseSuppression::Level>(FLAGS_ns_level))); |
131 | |
132 printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n", | |
133 FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate()); | |
134 printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n", | |
135 FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate()); | |
aluebs-webrtc
2015/10/24 00:53:34
Why removing these? I thought it was a nice featur
Andrew MacDonald
2015/10/29 00:44:50
OK, added back.
| |
136 | |
137 ChannelBuffer<float> in_buf( | |
138 rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond), | |
139 in_file.num_channels()); | |
140 ChannelBuffer<float> out_buf( | |
141 rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond), | |
142 out_file.num_channels()); | |
143 | |
144 std::vector<float> in_interleaved(in_buf.size()); | |
145 std::vector<float> out_interleaved(out_buf.size()); | |
146 | |
147 rtc::scoped_ptr<WavReader> in_rev_file; | |
148 rtc::scoped_ptr<WavWriter> out_rev_file; | |
149 rtc::scoped_ptr<ChannelBuffer<float>> in_rev_buf; | |
150 rtc::scoped_ptr<ChannelBuffer<float>> out_rev_buf; | |
151 std::vector<float> in_rev_interleaved; | |
152 std::vector<float> out_rev_interleaved; | |
153 if (process_reverse) { | |
154 in_rev_file.reset(new WavReader(FLAGS_i_rev)); | |
155 out_rev_file.reset(new WavWriter(FLAGS_o_rev, in_rev_file->sample_rate(), | |
156 in_rev_file->num_channels())); | |
157 printf("In rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n", | |
158 FLAGS_i_rev.c_str(), in_rev_file->num_channels(), | |
159 in_rev_file->sample_rate()); | |
160 printf("Out rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n", | |
161 FLAGS_o_rev.c_str(), out_rev_file->num_channels(), | |
162 out_rev_file->sample_rate()); | |
163 in_rev_buf.reset(new ChannelBuffer<float>( | |
164 rtc::CheckedDivExact(in_rev_file->sample_rate(), kChunksPerSecond), | |
165 in_rev_file->num_channels())); | |
166 in_rev_interleaved.resize(in_rev_buf->size()); | |
167 out_rev_buf.reset(new ChannelBuffer<float>( | |
168 rtc::CheckedDivExact(out_rev_file->sample_rate(), kChunksPerSecond), | |
169 out_rev_file->num_channels())); | |
170 out_rev_interleaved.resize(out_rev_buf->size()); | |
171 } | 107 } |
172 | 108 |
173 TickTime processing_start_time; | 109 rtc::scoped_ptr<AudioFileProcessor> processor; |
174 TickInterval accumulated_time; | 110 auto out_file = rtc_make_scoped_ptr( |
111 new WavWriter(FLAGS_o, FLAGS_out_sample_rate, FLAGS_out_channels)); | |
112 if (FLAGS_dump.empty()) { | |
113 auto in_file = rtc_make_scoped_ptr(new WavReader(FLAGS_i)); | |
114 processor.reset( | |
115 new WavFileProcessor(ap.Pass(), in_file.Pass(), out_file.Pass())); | |
116 | |
117 } else { | |
118 processor.reset(new AecDumpFileProcessor( | |
119 ap.Pass(), fopen(FLAGS_dump.c_str(), "rb"), out_file.Pass())); | |
peah-webrtc
2015/10/20 21:17:25
I think this fopen should be done outside the cons
Andrew MacDonald
2015/10/21 00:29:28
There are a few advantages to doing it this way:
1
peah-webrtc
2015/10/21 08:10:04
Those are very valid points! And the error message
| |
120 } | |
121 | |
175 int num_chunks = 0; | 122 int num_chunks = 0; |
123 while (processor->ProcessChunk()) { | |
peah-webrtc
2015/10/20 21:17:25
I'd actually rather put this while loop in the pro
Andrew MacDonald
2015/10/21 00:29:28
I wasn't planning to even permit running such scen
peah-webrtc
2015/10/21 08:10:04
Hmm, my maybe that is a good way to do it. It make
Andrew MacDonald
2015/10/22 00:12:09
This is almost exactly what I have in this CL, exc
peah-webrtc
2015/10/22 04:50:40
Answers inline below:
Andrew MacDonald
2015/10/22 05:11:55
Correct.
peah-webrtc
2015/10/22 14:38:30
Ok, I yield :-). It is indeed as you point out a o
Andrew MacDonald
2015/10/22 16:38:20
:-)
| |
124 trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond); | |
125 ++num_chunks; | |
126 } | |
176 | 127 |
177 const auto input_config = MakeStreamConfig(&in_file); | 128 if (FLAGS_perf) { |
178 const auto output_config = MakeStreamConfig(&out_file); | 129 int64_t execution_time_ms = processor->processing_time_ms(); |
peah-webrtc
2015/10/20 21:17:25
It would be nice also to have the max and min comp
Andrew MacDonald
2015/10/21 00:29:28
Good point, done.
I typically don't think the max
peah-webrtc
2015/10/21 08:10:04
That makes sense. If we can get a histogram, that
| |
179 const auto reverse_input_config = MakeStreamConfig(in_rev_file.get()); | 130 printf( |
180 const auto reverse_output_config = MakeStreamConfig(out_rev_file.get()); | 131 "\nExecution time: %.3f s\nFile time: %.2f s\n" |
132 "Time per chunk: %.3f ms\n", | |
133 execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond, | |
134 execution_time_ms * 1.f / num_chunks); | |
135 } | |
181 | 136 |
182 while (in_file.ReadSamples(in_interleaved.size(), | |
183 &in_interleaved[0]) == in_interleaved.size()) { | |
184 // Have logs display the file time rather than wallclock time. | |
185 trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond); | |
186 FloatS16ToFloat(&in_interleaved[0], in_interleaved.size(), | |
187 &in_interleaved[0]); | |
188 Deinterleave(&in_interleaved[0], in_buf.num_frames(), | |
189 in_buf.num_channels(), in_buf.channels()); | |
190 if (process_reverse) { | |
191 in_rev_file->ReadSamples(in_rev_interleaved.size(), | |
192 in_rev_interleaved.data()); | |
193 FloatS16ToFloat(in_rev_interleaved.data(), in_rev_interleaved.size(), | |
194 in_rev_interleaved.data()); | |
195 Deinterleave(in_rev_interleaved.data(), in_rev_buf->num_frames(), | |
196 in_rev_buf->num_channels(), in_rev_buf->channels()); | |
197 } | |
198 | |
199 if (FLAGS_perf) { | |
200 processing_start_time = TickTime::Now(); | |
201 } | |
202 RTC_CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config, | |
203 output_config, out_buf.channels())); | |
204 if (process_reverse) { | |
205 RTC_CHECK_EQ(kNoErr, ap->ProcessReverseStream( | |
206 in_rev_buf->channels(), reverse_input_config, | |
207 reverse_output_config, out_rev_buf->channels())); | |
208 } | |
209 if (FLAGS_perf) { | |
210 accumulated_time += TickTime::Now() - processing_start_time; | |
211 } | |
212 | |
213 Interleave(out_buf.channels(), out_buf.num_frames(), | |
214 out_buf.num_channels(), &out_interleaved[0]); | |
215 FloatToFloatS16(&out_interleaved[0], out_interleaved.size(), | |
216 &out_interleaved[0]); | |
217 out_file.WriteSamples(&out_interleaved[0], out_interleaved.size()); | |
218 if (process_reverse) { | |
219 Interleave(out_rev_buf->channels(), out_rev_buf->num_frames(), | |
220 out_rev_buf->num_channels(), out_rev_interleaved.data()); | |
221 FloatToFloatS16(out_rev_interleaved.data(), out_rev_interleaved.size(), | |
222 out_rev_interleaved.data()); | |
223 out_rev_file->WriteSamples(out_rev_interleaved.data(), | |
224 out_rev_interleaved.size()); | |
225 } | |
226 num_chunks++; | |
227 } | |
228 if (FLAGS_perf) { | |
229 int64_t execution_time_ms = accumulated_time.Milliseconds(); | |
230 printf("\nExecution time: %.3f s\nFile time: %.2f s\n" | |
231 "Time per chunk: %.3f ms\n", | |
232 execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond, | |
233 execution_time_ms * 1.f / num_chunks); | |
234 } | |
235 return 0; | 137 return 0; |
236 } | 138 } |
237 | 139 |
238 } // namespace webrtc | 140 } // namespace webrtc |
239 | 141 |
240 int main(int argc, char* argv[]) { | 142 int main(int argc, char* argv[]) { |
241 return webrtc::main(argc, argv); | 143 return webrtc::main(argc, argv); |
242 } | 144 } |
OLD | NEW |