OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <stdio.h> | 11 #include <stdio.h> |
12 #include <iostream> | |
13 #include <sstream> | 12 #include <sstream> |
14 #include <string> | 13 #include <string> |
15 | 14 |
16 #include "gflags/gflags.h" | 15 #include "gflags/gflags.h" |
17 #include "webrtc/base/checks.h" | 16 #include "webrtc/base/checks.h" |
18 #include "webrtc/base/scoped_ptr.h" | 17 #include "webrtc/base/scoped_ptr.h" |
19 #include "webrtc/common_audio/channel_buffer.h" | 18 #include "webrtc/common_audio/channel_buffer.h" |
20 #include "webrtc/common_audio/wav_file.h" | 19 #include "webrtc/common_audio/wav_file.h" |
21 #include "webrtc/modules/audio_processing/include/audio_processing.h" | 20 #include "webrtc/modules/audio_processing/include/audio_processing.h" |
22 #include "webrtc/modules/audio_processing/test/audio_file_processor.h" | |
23 #include "webrtc/modules/audio_processing/test/protobuf_utils.h" | 21 #include "webrtc/modules/audio_processing/test/protobuf_utils.h" |
24 #include "webrtc/modules/audio_processing/test/test_utils.h" | 22 #include "webrtc/modules/audio_processing/test/test_utils.h" |
25 #include "webrtc/system_wrappers/include/tick_util.h" | 23 #include "webrtc/system_wrappers/include/tick_util.h" |
26 #include "webrtc/test/testsupport/trace_to_stderr.h" | 24 #include "webrtc/test/testsupport/trace_to_stderr.h" |
27 | 25 |
28 DEFINE_string(dump, "", "Name of the aecdump debug file to read from."); | 26 DEFINE_string(dump, "", "The name of the debug dump file to read from."); |
29 DEFINE_string(i, "", "Name of the capture input stream file to read from."); | 27 DEFINE_string(i, "", "The name of the input file to read from."); |
30 DEFINE_string( | 28 DEFINE_string(i_rev, "", "The name of the reverse input file to read from."); |
31 o, | 29 DEFINE_string(o, "out.wav", "Name of the output file to write to."); |
32 "out.wav", | 30 DEFINE_string(o_rev, |
33 "Name of the output file to write the processed capture stream to."); | 31 "out_rev.wav", |
34 DEFINE_int32(out_channels, 1, "Number of output channels."); | 32 "Name of the reverse output file to write to."); |
35 DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz."); | 33 DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input."); |
| 34 DEFINE_int32(out_sample_rate, 0, |
| 35 "Output sample rate in Hz. Defaults to input."); |
36 DEFINE_string(mic_positions, "", | 36 DEFINE_string(mic_positions, "", |
37 "Space delimited cartesian coordinates of microphones in meters. " | 37 "Space delimited cartesian coordinates of microphones in meters. " |
38 "The coordinates of each point are contiguous. " | 38 "The coordinates of each point are contiguous. " |
39 "For a two element array: \"x1 y1 z1 x2 y2 z2\""); | 39 "For a two element array: \"x1 y1 z1 x2 y2 z2\""); |
40 DEFINE_double( | 40 DEFINE_double(target_angle_degrees, 90, "The azimuth of the target in radians"); |
41 target_angle_degrees, | |
42 90, | |
43 "The azimuth of the target in degrees. Only applies to beamforming."); | |
44 | 41 |
45 DEFINE_bool(aec, false, "Enable echo cancellation."); | 42 DEFINE_bool(aec, false, "Enable echo cancellation."); |
46 DEFINE_bool(agc, false, "Enable automatic gain control."); | 43 DEFINE_bool(agc, false, "Enable automatic gain control."); |
47 DEFINE_bool(hpf, false, "Enable high-pass filtering."); | 44 DEFINE_bool(hpf, false, "Enable high-pass filtering."); |
48 DEFINE_bool(ns, false, "Enable noise suppression."); | 45 DEFINE_bool(ns, false, "Enable noise suppression."); |
49 DEFINE_bool(ts, false, "Enable transient suppression."); | 46 DEFINE_bool(ts, false, "Enable transient suppression."); |
50 DEFINE_bool(bf, false, "Enable beamforming."); | 47 DEFINE_bool(bf, false, "Enable beamforming."); |
51 DEFINE_bool(ie, false, "Enable intelligibility enhancer."); | 48 DEFINE_bool(ie, false, "Enable intelligibility enhancer."); |
52 DEFINE_bool(all, false, "Enable all components."); | 49 DEFINE_bool(all, false, "Enable all components."); |
53 | 50 |
54 DEFINE_int32(ns_level, -1, "Noise suppression level [0 - 3]."); | 51 DEFINE_int32(ns_level, -1, "Noise suppression level [0 - 3]."); |
55 | 52 |
56 DEFINE_bool(perf, false, "Enable performance tests."); | 53 DEFINE_bool(perf, false, "Enable performance tests."); |
57 | 54 |
58 namespace webrtc { | 55 namespace webrtc { |
59 namespace { | 56 namespace { |
60 | 57 |
61 const int kChunksPerSecond = 100; | 58 const int kChunksPerSecond = 100; |
62 const char kUsage[] = | 59 const char kUsage[] = |
63 "Command-line tool to run audio processing on WAV files. Accepts either\n" | 60 "Command-line tool to run audio processing on WAV files. Accepts either\n" |
64 "an input capture WAV file or protobuf debug dump and writes to an output\n" | 61 "an input capture WAV file or protobuf debug dump and writes to an output\n" |
65 "WAV file.\n" | 62 "WAV file.\n" |
66 "\n" | 63 "\n" |
67 "All components are disabled by default. If any bi-directional components\n" | 64 "All components are disabled by default. If any bi-directional components\n" |
68 "are enabled, only debug dump files are permitted."; | 65 "are enabled, only debug dump files are permitted."; |
69 | 66 |
| 67 // Returns a StreamConfig corresponding to wav_file if it's non-nullptr. |
| 68 // Otherwise returns a default initialized StreamConfig. |
| 69 StreamConfig MakeStreamConfig(const WavFile* wav_file) { |
| 70 if (wav_file) { |
| 71 return {wav_file->sample_rate(), wav_file->num_channels()}; |
| 72 } |
| 73 return {}; |
| 74 } |
| 75 |
70 } // namespace | 76 } // namespace |
71 | 77 |
72 int main(int argc, char* argv[]) { | 78 int main(int argc, char* argv[]) { |
73 google::SetUsageMessage(kUsage); | 79 google::SetUsageMessage(kUsage); |
74 google::ParseCommandLineFlags(&argc, &argv, true); | 80 google::ParseCommandLineFlags(&argc, &argv, true); |
75 | 81 |
76 if (!((FLAGS_i.empty()) ^ (FLAGS_dump.empty()))) { | 82 if (!((FLAGS_i.empty()) ^ (FLAGS_dump.empty()))) { |
77 fprintf(stderr, | 83 fprintf(stderr, |
78 "An input file must be specified with either -i or -dump.\n"); | 84 "An input file must be specified with either -i or -dump.\n"); |
79 return 1; | 85 return 1; |
80 } | 86 } |
81 if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) { | 87 if (!FLAGS_dump.empty()) { |
82 fprintf(stderr, "-aec and -ie require a -dump file.\n"); | 88 fprintf(stderr, "FIXME: the -dump option is not yet implemented.\n"); |
83 return 1; | |
84 } | |
85 if (FLAGS_ie) { | |
86 fprintf(stderr, | |
87 "FIXME(ajm): The intelligibility enhancer output is not dumped.\n"); | |
88 return 1; | 89 return 1; |
89 } | 90 } |
90 | 91 |
91 test::TraceToStderr trace_to_stderr(true); | 92 test::TraceToStderr trace_to_stderr(true); |
| 93 WavReader in_file(FLAGS_i); |
| 94 // If the output format is uninitialized, use the input format. |
| 95 const int out_channels = |
| 96 FLAGS_out_channels ? FLAGS_out_channels : in_file.num_channels(); |
| 97 const int out_sample_rate = |
| 98 FLAGS_out_sample_rate ? FLAGS_out_sample_rate : in_file.sample_rate(); |
| 99 WavWriter out_file(FLAGS_o, out_sample_rate, out_channels); |
| 100 |
92 Config config; | 101 Config config; |
| 102 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all)); |
| 103 config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all)); |
| 104 |
93 if (FLAGS_bf || FLAGS_all) { | 105 if (FLAGS_bf || FLAGS_all) { |
94 if (FLAGS_mic_positions.empty()) { | 106 const size_t num_mics = in_file.num_channels(); |
95 fprintf(stderr, "-mic_positions must be specified when -bf is used.\n"); | 107 const std::vector<Point> array_geometry = |
96 return 1; | 108 ParseArrayGeometry(FLAGS_mic_positions, num_mics); |
97 } | 109 RTC_CHECK_EQ(array_geometry.size(), num_mics); |
| 110 |
98 config.Set<Beamforming>(new Beamforming( | 111 config.Set<Beamforming>(new Beamforming( |
99 true, ParseArrayGeometry(FLAGS_mic_positions), | 112 true, array_geometry, |
100 SphericalPointf(DegreesToRadians(FLAGS_target_angle_degrees), 0.f, | 113 SphericalPointf(DegreesToRadians(FLAGS_target_angle_degrees), 0.f, |
101 1.f))); | 114 1.f))); |
102 } | 115 } |
103 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all)); | |
104 config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all)); | |
105 | 116 |
106 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); | 117 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); |
107 RTC_CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all)); | 118 if (!FLAGS_dump.empty()) { |
| 119 RTC_CHECK_EQ(kNoErr, |
| 120 ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all)); |
| 121 } else if (FLAGS_aec) { |
| 122 fprintf(stderr, "-aec requires a -dump file.\n"); |
| 123 return -1; |
| 124 } |
| 125 bool process_reverse = !FLAGS_i_rev.empty(); |
108 RTC_CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all)); | 126 RTC_CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all)); |
| 127 RTC_CHECK_EQ(kNoErr, |
| 128 ap->gain_control()->set_mode(GainControl::kFixedDigital)); |
109 RTC_CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all)); | 129 RTC_CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all)); |
110 RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all)); | 130 RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all)); |
111 if (FLAGS_ns_level != -1) { | 131 if (FLAGS_ns_level != -1) |
112 RTC_CHECK_EQ(kNoErr, | 132 RTC_CHECK_EQ(kNoErr, |
113 ap->noise_suppression()->set_level( | 133 ap->noise_suppression()->set_level( |
114 static_cast<NoiseSuppression::Level>(FLAGS_ns_level))); | 134 static_cast<NoiseSuppression::Level>(FLAGS_ns_level))); |
115 } | 135 } |
116 ap->set_stream_key_pressed(FLAGS_ts); | 136 ap->set_stream_key_pressed(FLAGS_ts); |
117 | 137 |
118 rtc::scoped_ptr<AudioFileProcessor> processor; | 138 printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n", |
119 auto out_file = rtc_make_scoped_ptr( | 139 FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate()); |
120 new WavWriter(FLAGS_o, FLAGS_out_sample_rate, FLAGS_out_channels)); | 140 printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n", |
121 std::cout << FLAGS_o << ": " << out_file->FormatAsString() << std::endl; | 141 FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate()); |
122 if (FLAGS_dump.empty()) { | |
123 auto in_file = rtc_make_scoped_ptr(new WavReader(FLAGS_i)); | |
124 std::cout << FLAGS_i << ": " << in_file->FormatAsString() << std::endl; | |
125 processor.reset( | |
126 new WavFileProcessor(ap.Pass(), in_file.Pass(), out_file.Pass())); | |
127 | 142 |
128 } else { | 143 ChannelBuffer<float> in_buf( |
129 processor.reset(new AecDumpFileProcessor( | 144 rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond), |
130 ap.Pass(), fopen(FLAGS_dump.c_str(), "rb"), out_file.Pass())); | 145 in_file.num_channels()); |
| 146 ChannelBuffer<float> out_buf( |
| 147 rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond), |
| 148 out_file.num_channels()); |
| 149 |
| 150 std::vector<float> in_interleaved(in_buf.size()); |
| 151 std::vector<float> out_interleaved(out_buf.size()); |
| 152 |
| 153 rtc::scoped_ptr<WavReader> in_rev_file; |
| 154 rtc::scoped_ptr<WavWriter> out_rev_file; |
| 155 rtc::scoped_ptr<ChannelBuffer<float>> in_rev_buf; |
| 156 rtc::scoped_ptr<ChannelBuffer<float>> out_rev_buf; |
| 157 std::vector<float> in_rev_interleaved; |
| 158 std::vector<float> out_rev_interleaved; |
| 159 if (process_reverse) { |
| 160 in_rev_file.reset(new WavReader(FLAGS_i_rev)); |
| 161 out_rev_file.reset(new WavWriter(FLAGS_o_rev, in_rev_file->sample_rate(), |
| 162 in_rev_file->num_channels())); |
| 163 printf("In rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n", |
| 164 FLAGS_i_rev.c_str(), in_rev_file->num_channels(), |
| 165 in_rev_file->sample_rate()); |
| 166 printf("Out rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n", |
| 167 FLAGS_o_rev.c_str(), out_rev_file->num_channels(), |
| 168 out_rev_file->sample_rate()); |
| 169 in_rev_buf.reset(new ChannelBuffer<float>( |
| 170 rtc::CheckedDivExact(in_rev_file->sample_rate(), kChunksPerSecond), |
| 171 in_rev_file->num_channels())); |
| 172 in_rev_interleaved.resize(in_rev_buf->size()); |
| 173 out_rev_buf.reset(new ChannelBuffer<float>( |
| 174 rtc::CheckedDivExact(out_rev_file->sample_rate(), kChunksPerSecond), |
| 175 out_rev_file->num_channels())); |
| 176 out_rev_interleaved.resize(out_rev_buf->size()); |
131 } | 177 } |
132 | 178 |
| 179 TickTime processing_start_time; |
| 180 TickInterval accumulated_time; |
133 int num_chunks = 0; | 181 int num_chunks = 0; |
134 while (processor->ProcessChunk()) { | 182 |
| 183 const auto input_config = MakeStreamConfig(&in_file); |
| 184 const auto output_config = MakeStreamConfig(&out_file); |
| 185 const auto reverse_input_config = MakeStreamConfig(in_rev_file.get()); |
| 186 const auto reverse_output_config = MakeStreamConfig(out_rev_file.get()); |
| 187 |
| 188 while (in_file.ReadSamples(in_interleaved.size(), |
| 189 &in_interleaved[0]) == in_interleaved.size()) { |
| 190 // Have logs display the file time rather than wallclock time. |
135 trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond); | 191 trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond); |
136 ++num_chunks; | 192 FloatS16ToFloat(&in_interleaved[0], in_interleaved.size(), |
| 193 &in_interleaved[0]); |
| 194 Deinterleave(&in_interleaved[0], in_buf.num_frames(), |
| 195 in_buf.num_channels(), in_buf.channels()); |
| 196 if (process_reverse) { |
| 197 in_rev_file->ReadSamples(in_rev_interleaved.size(), |
| 198 in_rev_interleaved.data()); |
| 199 FloatS16ToFloat(in_rev_interleaved.data(), in_rev_interleaved.size(), |
| 200 in_rev_interleaved.data()); |
| 201 Deinterleave(in_rev_interleaved.data(), in_rev_buf->num_frames(), |
| 202 in_rev_buf->num_channels(), in_rev_buf->channels()); |
| 203 } |
| 204 |
| 205 if (FLAGS_perf) { |
| 206 processing_start_time = TickTime::Now(); |
| 207 } |
| 208 RTC_CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config, |
| 209 output_config, out_buf.channels())); |
| 210 if (process_reverse) { |
| 211 RTC_CHECK_EQ(kNoErr, ap->ProcessReverseStream( |
| 212 in_rev_buf->channels(), reverse_input_config, |
| 213 reverse_output_config, out_rev_buf->channels())); |
| 214 } |
| 215 if (FLAGS_perf) { |
| 216 accumulated_time += TickTime::Now() - processing_start_time; |
| 217 } |
| 218 |
| 219 Interleave(out_buf.channels(), out_buf.num_frames(), |
| 220 out_buf.num_channels(), &out_interleaved[0]); |
| 221 FloatToFloatS16(&out_interleaved[0], out_interleaved.size(), |
| 222 &out_interleaved[0]); |
| 223 out_file.WriteSamples(&out_interleaved[0], out_interleaved.size()); |
| 224 if (process_reverse) { |
| 225 Interleave(out_rev_buf->channels(), out_rev_buf->num_frames(), |
| 226 out_rev_buf->num_channels(), out_rev_interleaved.data()); |
| 227 FloatToFloatS16(out_rev_interleaved.data(), out_rev_interleaved.size(), |
| 228 out_rev_interleaved.data()); |
| 229 out_rev_file->WriteSamples(out_rev_interleaved.data(), |
| 230 out_rev_interleaved.size()); |
| 231 } |
| 232 num_chunks++; |
137 } | 233 } |
138 | |
139 if (FLAGS_perf) { | 234 if (FLAGS_perf) { |
140 const auto& proc_time = processor->proc_time(); | 235 int64_t execution_time_ms = accumulated_time.Milliseconds(); |
141 int64_t exec_time_us = proc_time.sum.Microseconds(); | 236 printf("\nExecution time: %.3f s\nFile time: %.2f s\n" |
142 printf( | 237 "Time per chunk: %.3f ms\n", |
143 "\nExecution time: %.3f s, File time: %.2f s\n" | 238 execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond, |
144 "Time per chunk (mean, max, min):\n%.0f us, %.0f us, %.0f us\n", | 239 execution_time_ms * 1.f / num_chunks); |
145 exec_time_us * 1e-6, num_chunks * 1.f / kChunksPerSecond, | |
146 exec_time_us * 1.f / num_chunks, 1.f * proc_time.max.Microseconds(), | |
147 1.f * proc_time.min.Microseconds()); | |
148 } | 240 } |
149 | |
150 return 0; | 241 return 0; |
151 } | 242 } |
152 | 243 |
153 } // namespace webrtc | 244 } // namespace webrtc |
154 | 245 |
155 int main(int argc, char* argv[]) { | 246 int main(int argc, char* argv[]) { |
156 return webrtc::main(argc, argv); | 247 return webrtc::main(argc, argv); |
157 } | 248 } |
OLD | NEW |