webrtc/modules/audio_processing/test/audioproc_float.cc - Issue 1409943002: Add aecdump support to audioproc_f.

Unified Diff: webrtc/modules/audio_processing/test/audioproc_float.cc

Issue 1409943002: Add aecdump support to audioproc_f. (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« webrtc/modules/audio_processing/test/audio_file_processor.cc ('K') | « webrtc/modules/audio_processing/test/audio_file_processor.cc ('k') | webrtc/modules/audio_processing/test/process_test.cc » ('j') | webrtc/modules/audio_processing/test/test_utils.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: webrtc/modules/audio_processing/test/audioproc_float.cc

diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc

index 9c44d76ecc29fb404a91630fd7f5efe1247fe988..7bdcea622ecd8ffe9639d9b2dfb4d8b8e3ec88be 100644

--- a/webrtc/modules/audio_processing/test/audioproc_float.cc

+++ b/webrtc/modules/audio_processing/test/audioproc_float.cc

@@ -18,6 +18,7 @@

#include "webrtc/common_audio/channel_buffer.h"

#include "webrtc/common_audio/wav_file.h"

#include "webrtc/modules/audio_processing/include/audio_processing.h"

+#include "webrtc/modules/audio_processing/test/audio_file_processor.h"

#include "webrtc/modules/audio_processing/test/protobuf_utils.h"

#include "webrtc/modules/audio_processing/test/test_utils.h"

#include "webrtc/system_wrappers/interface/tick_util.h"

@@ -25,14 +26,9 @@

DEFINE_string(dump, "", "The name of the debug dump file to read from.");

DEFINE_string(i, "", "The name of the input file to read from.");

-DEFINE_string(i_rev, "", "The name of the reverse input file to read from.");

DEFINE_string(o, "out.wav", "Name of the output file to write to.");

peah-webrtc 2015/10/20 21:17:25 This description should be made more precise. For

Andrew MacDonald 2015/10/21 00:29:28 Done.

-DEFINE_string(o_rev,

- "out_rev.wav",

- "Name of the reverse output file to write to.");

-DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input.");

-DEFINE_int32(out_sample_rate, 0,

- "Output sample rate in Hz. Defaults to input.");

aluebs-webrtc 2015/10/24 00:53:34 Having the output format default to the input one

+DEFINE_int32(out_channels, 1, "Number of output channels.");

+DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz.");

DEFINE_string(mic_positions, "",

"Space delimited cartesian coordinates of microphones in meters. "

"The coordinates of each point are contiguous. "

@@ -63,15 +59,6 @@ const char kUsage[] =

"All components are disabled by default. If any bi-directional components\n"

"are enabled, only debug dump files are permitted.";

-// Returns a StreamConfig corresponding to wav_file if it's non-nullptr.

-// Otherwise returns a default initialized StreamConfig.

-StreamConfig MakeStreamConfig(const WavFile* wav_file) {

- if (wav_file) {

- return {wav_file->sample_rate(), wav_file->num_channels()};

- }

- return {};

} // namespace

int main(int argc, char* argv[]) {

@@ -83,155 +70,70 @@ int main(int argc, char* argv[]) {

"An input file must be specified with either -i or -dump.\n");

return 1;

}

- if (!FLAGS_dump.empty()) {

- fprintf(stderr, "FIXME: the -dump option is not yet implemented.\n");

+ if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) {

peah-webrtc 2015/10/20 21:17:25 Most likely, it would make sense here to try openi

Andrew MacDonald 2015/10/21 00:29:28 See comments below.

+ fprintf(stderr, "-aec and -ie require a -dump file.\n");

+ return 1;

+ }

+ if (FLAGS_ts) {

+ fprintf(stderr,

+ "FIXME(ajm): The transient suppression output is not dumped.\n");

aluebs-webrtc 2015/10/24 00:53:34 What TS output?

Andrew MacDonald 2015/10/29 00:44:50 TS processes the reverse (aka render) stream. Afte

aluebs-webrtc 2015/10/29 01:03:20 You mean the Intelligibility Enhancer?

Andrew MacDonald 2015/10/29 01:14:33 Yikes! Yes that's what I mean. Will fix.

Andrew MacDonald 2015/10/30 00:21:16 Done.

return 1;

}

test::TraceToStderr trace_to_stderr(true);

- WavReader in_file(FLAGS_i);

- // If the output format is uninitialized, use the input format.

- const int out_channels =

- FLAGS_out_channels ? FLAGS_out_channels : in_file.num_channels();

- const int out_sample_rate =

- FLAGS_out_sample_rate ? FLAGS_out_sample_rate : in_file.sample_rate();

- WavWriter out_file(FLAGS_o, out_sample_rate, out_channels);

Config config;

- config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all));

- config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all));

if (FLAGS_bf || FLAGS_all) {

- const size_t num_mics = in_file.num_channels();

- const std::vector<Point> array_geometry =

- ParseArrayGeometry(FLAGS_mic_positions, num_mics);

- RTC_CHECK_EQ(array_geometry.size(), num_mics);

- config.Set<Beamforming>(new Beamforming(true, array_geometry));

+ if (FLAGS_mic_positions.empty()) {

+ fprintf(stderr, "-mic_positions must be specified when -bf is used.\n");

+ return 1;

+ }

+ config.Set<Beamforming>(

+ new Beamforming(true, ParseArrayGeometry(FLAGS_mic_positions)));

}

+ config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all));

+ config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all));

rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));

- if (!FLAGS_dump.empty()) {

- RTC_CHECK_EQ(kNoErr,

- ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all));

- } else if (FLAGS_aec) {

- fprintf(stderr, "-aec requires a -dump file.\n");

- return -1;

- }

- bool process_reverse = !FLAGS_i_rev.empty();

+ RTC_CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all));

RTC_CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all));

RTC_CHECK_EQ(kNoErr,

ap->gain_control()->set_mode(GainControl::kFixedDigital));

peah-webrtc 2015/10/20 21:17:25 Here I think we also need to set the analog gain c

Andrew MacDonald 2015/10/21 00:29:28 Thanks for noticing this. It actually has no effec

peah-webrtc 2015/10/21 08:10:04 Great! Did not know that.

RTC_CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all));

RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all));

- if (FLAGS_ns_level != -1)

+ if (FLAGS_ns_level != -1) {

RTC_CHECK_EQ(kNoErr,

ap->noise_suppression()->set_level(

static_cast<NoiseSuppression::Level>(FLAGS_ns_level)));

+ }

- printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

- FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate());

- printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

- FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate());

aluebs-webrtc 2015/10/24 00:53:34 Why removing these? I thought it was a nice featur

Andrew MacDonald 2015/10/29 00:44:50 OK, added back.

- ChannelBuffer<float> in_buf(

- rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond),

- in_file.num_channels());

- ChannelBuffer<float> out_buf(

- rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond),

- out_file.num_channels());

- std::vector<float> in_interleaved(in_buf.size());

- std::vector<float> out_interleaved(out_buf.size());

- rtc::scoped_ptr<WavReader> in_rev_file;

- rtc::scoped_ptr<WavWriter> out_rev_file;

- rtc::scoped_ptr<ChannelBuffer<float>> in_rev_buf;

- rtc::scoped_ptr<ChannelBuffer<float>> out_rev_buf;

- std::vector<float> in_rev_interleaved;

- std::vector<float> out_rev_interleaved;

- if (process_reverse) {

- in_rev_file.reset(new WavReader(FLAGS_i_rev));

- out_rev_file.reset(new WavWriter(FLAGS_o_rev, in_rev_file->sample_rate(),

- in_rev_file->num_channels()));

- printf("In rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

- FLAGS_i_rev.c_str(), in_rev_file->num_channels(),

- in_rev_file->sample_rate());

- printf("Out rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

- FLAGS_o_rev.c_str(), out_rev_file->num_channels(),

- out_rev_file->sample_rate());

- in_rev_buf.reset(new ChannelBuffer<float>(

- rtc::CheckedDivExact(in_rev_file->sample_rate(), kChunksPerSecond),

- in_rev_file->num_channels()));

- in_rev_interleaved.resize(in_rev_buf->size());

- out_rev_buf.reset(new ChannelBuffer<float>(

- rtc::CheckedDivExact(out_rev_file->sample_rate(), kChunksPerSecond),

- out_rev_file->num_channels()));

- out_rev_interleaved.resize(out_rev_buf->size());

+ rtc::scoped_ptr<AudioFileProcessor> processor;

+ auto out_file = rtc_make_scoped_ptr(

+ new WavWriter(FLAGS_o, FLAGS_out_sample_rate, FLAGS_out_channels));

+ if (FLAGS_dump.empty()) {

+ auto in_file = rtc_make_scoped_ptr(new WavReader(FLAGS_i));

+ processor.reset(

+ new WavFileProcessor(ap.Pass(), in_file.Pass(), out_file.Pass()));

+ } else {

+ processor.reset(new AecDumpFileProcessor(

+ ap.Pass(), fopen(FLAGS_dump.c_str(), "rb"), out_file.Pass()));

peah-webrtc 2015/10/20 21:17:25 I think this fopen should be done outside the cons

Andrew MacDonald 2015/10/21 00:29:28 There are a few advantages to doing it this way: 1

peah-webrtc 2015/10/21 08:10:04 Those are very valid points! And the error message

}

- TickTime processing_start_time;

- TickInterval accumulated_time;

int num_chunks = 0;

- const auto input_config = MakeStreamConfig(&in_file);

- const auto output_config = MakeStreamConfig(&out_file);

- const auto reverse_input_config = MakeStreamConfig(in_rev_file.get());

- const auto reverse_output_config = MakeStreamConfig(out_rev_file.get());

- while (in_file.ReadSamples(in_interleaved.size(),

- &in_interleaved[0]) == in_interleaved.size()) {

- // Have logs display the file time rather than wallclock time.

+ while (processor->ProcessChunk()) {

peah-webrtc 2015/10/20 21:17:25 I'd actually rather put this while loop in the pro

Andrew MacDonald 2015/10/21 00:29:28 I wasn't planning to even permit running such scen

peah-webrtc 2015/10/21 08:10:04 Hmm, my maybe that is a good way to do it. It make

Andrew MacDonald 2015/10/22 00:12:09 This is almost exactly what I have in this CL, exc

This is almost exactly what I have in this CL, except that I'm calling a single chunk method in a loop rather than processing the whole file in one shot. I did it this way because I expected there to be more shared functionality happening in the loop here. As it turns out, we only need to count the chunks and update the trace time override. I'd prefer to leave it this way for now. It's easy to refactor into a one-shot method if it becomes clearly superior in the future.

My proposal for a simulation -> aecdump converter tool wouldn't repeat any of the command-line flags from here. Essentially, you would provide it capture and render files and some parameters about delay and distribution of calls to render and capture and it would spit out an aecdump file. I think we should add support in audioproc_f for passing in capture and render files and enabling AEC and intelligibility enhancement (IE) processing of them. But it shouldn't accept delay, clock drift, order of calls etc. If you want that fanciness, you should have to provide an aecdump.

peah-webrtc 2015/10/22 04:50:40 Answers inline below:

On 2015/10/22 00:12:09, Andrew MacDonald wrote: > On 2015/10/21 08:10:04, peah-webrtc wrote: > > Hmm, my maybe that is a good way to do it. It makes sense but my main concern > is > > that two boilerplate codes of command line parsing will then need to be > > maintained. > > > > My suggestion would be to have one common file that contains two layers, one > for > > parsing the command line and does any sanity checks/timer initialization/etc. > > This then selectively calls two methods in the second layer, one for the > aecdump > > and one for wav-file-based-call synthesis. > > This is almost exactly what I have in this CL, except that I'm calling a single > chunk method in a loop rather than processing the whole file in one shot. I did > it this way because I expected there to be more shared functionality happening > in the loop here. As it turns out, we only need to count the chunks and update > the trace time override. > > I'd prefer to leave it this way for now. It's easy to refactor into a one-shot > method if it becomes clearly superior in the future. > > > > > In my mind the unwieldyness comes from > > -having to support the command line arguments and the different settings, but > > that we need anyway for both. > > -needing to synthesize the call chain, but that we can separate into a > different > > file. > > > > The main drawback to having two tools for this is that they will diverge which > > will make them a pain to maintain. > > > > What do you think? > > My proposal for a simulation -> aecdump converter tool wouldn't repeat any of > the command-line flags from here. Essentially, you would provide it capture and > render files and some parameters about delay and distribution of calls to render > and capture and it would spit out an aecdump file. > > I think we should add support in audioproc_f for passing in capture and render > files and enabling AEC and intelligibility enhancement (IE) processing of them. > But it shouldn't accept delay, clock drift, order of calls etc. If you want that > fanciness, you should have to provide an aecdump.

Answers inline below:

You are correct that it can be changed later if needed, and I don't have a strong opinion about it apart from that I think it would be better not to separate out the for-loop. But let's keep that.

I think your idea about the simulation generating tool is great. That approach makes sense for sure and if I understand it correctly there will be no overlap at all in command line options. Sounds super! Do I understand it correctly, that that tool would take care of any scenario related options, such as drift, call order, sample rate, number of channels, etc, and that this tool only should allow setting parameters defining the operation of the APM, such as enable aec, set_suppression_level, etc? With that approach, however, I think it would be better to remove the ability for audioproc_f to process wav-files. The main reason for that is that if we have that as well in this tool, we will need to ensure that the default call-chains in the audioproc_f and simulation generating tools are the both kept the same. Furthermore, we need to ensure that they are kept the same as they are used in WebRTC/Chromium which I think significantly increases the risk of any of them diverging. So anyone doing an API change in APM would need to update that in 4 places and I think there is a high risk that one of these test files will be missed. We should keep that risk as low as possible and therefore I think it would make sense to only have the wav-file support in one file. WDYT?

Andrew MacDonald 2015/10/22 05:11:55 Correct.

Correct.

WavFileProcessor has no "default call-chain". Check it out. It's one call to ProcessStream.

As above, in general, nothing should need to change in WavFileProcessor as new APIs are added.

Most components (i.e. beamforming, noise suppression) don't need any run-time information beyond the input WAV file. I would hate to require users (me) to run a second tool needlessly for this case.

peah-webrtc 2015/10/22 14:38:30 Ok, I yield :-). It is indeed as you point out a o

On 2015/10/22 05:11:55, Andrew MacDonald wrote: > On 2015/10/22 04:50:40, peah-webrtc wrote: > > On 2015/10/22 00:12:09, Andrew MacDonald wrote: > > > I'd prefer to leave it this way for now. It's easy to refactor into a > one-shot > > > method if it becomes clearly superior in the future. > > You are correct that it can be changed later if needed, and I don't have a > > strong opinion about it apart from that I think it would be better not to > > separate out the for-loop. But let's keep that. > > > > > My proposal for a simulation -> aecdump converter tool wouldn't repeat any > of > > > the command-line flags from here. Essentially, you would provide it capture > > and > > > render files and some parameters about delay and distribution of calls to > > render > > > and capture and it would spit out an aecdump file. > > > > > > I think we should add support in audioproc_f for passing in capture and > render > > > files and enabling AEC and intelligibility enhancement (IE) processing of > > them. > > > But it shouldn't accept delay, clock drift, order of calls etc. If you want > > that > > > fanciness, you should have to provide an aecdump. > > I think your idea about the simulation generating tool is great. That approach > > makes sense for sure and if I understand it correctly there will be no overlap > > at all in command line options. Sounds super! > > Do I understand it correctly, that that tool would take care of any scenario > > related options, such as drift, call order, sample rate, number of channels, > > etc, and that this tool only should allow setting parameters defining the > > operation of the APM, such as enable aec, set_suppression_level, etc? > > Correct. > > > > > With that approach, however, I think it would be better to remove the ability > > for audioproc_f to process wav-files. The main reason for that is that if we > > have that as well in this tool, we will need to ensure that the default > > call-chains in the audioproc_f and simulation generating tools are the both > kept > > the same. > > WavFileProcessor has no "default call-chain". Check it out. It's one call to > ProcessStream. > > > Furthermore, we need to ensure that they are kept the same as they are > > used in WebRTC/Chromium which I think significantly increases the risk of any > of > > them diverging. So anyone doing an API change in APM would need to update that > > in 4 places and I think there is a high risk that one of these test files will > > be missed. > > As above, in general, nothing should need to change in WavFileProcessor as new > APIs are added. > > > We should keep that risk as low as possible and therefore I think it would > make > > sense to only have the wav-file support in one file. WDYT? > > Most components (i.e. beamforming, noise suppression) don't need any run-time > information beyond the input WAV file. I would hate to require users (me) to run > a second tool needlessly for this case.

Ok, I yield :-). It is indeed as you point out a oneliner so lets keep it for that usecase.

Andrew MacDonald 2015/10/22 16:38:20 :-)

trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond);

- FloatS16ToFloat(&in_interleaved[0], in_interleaved.size(),

- &in_interleaved[0]);

- Deinterleave(&in_interleaved[0], in_buf.num_frames(),

- in_buf.num_channels(), in_buf.channels());

- if (process_reverse) {

- in_rev_file->ReadSamples(in_rev_interleaved.size(),

- in_rev_interleaved.data());

- FloatS16ToFloat(in_rev_interleaved.data(), in_rev_interleaved.size(),

- in_rev_interleaved.data());

- Deinterleave(in_rev_interleaved.data(), in_rev_buf->num_frames(),

- in_rev_buf->num_channels(), in_rev_buf->channels());

- }

- if (FLAGS_perf) {

- processing_start_time = TickTime::Now();

- }

- RTC_CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config,

- output_config, out_buf.channels()));

- if (process_reverse) {

- RTC_CHECK_EQ(kNoErr, ap->ProcessReverseStream(

- in_rev_buf->channels(), reverse_input_config,

- reverse_output_config, out_rev_buf->channels()));

- }

- if (FLAGS_perf) {

- accumulated_time += TickTime::Now() - processing_start_time;

- }

- Interleave(out_buf.channels(), out_buf.num_frames(),

- out_buf.num_channels(), &out_interleaved[0]);

- FloatToFloatS16(&out_interleaved[0], out_interleaved.size(),

- &out_interleaved[0]);

- out_file.WriteSamples(&out_interleaved[0], out_interleaved.size());

- if (process_reverse) {

- Interleave(out_rev_buf->channels(), out_rev_buf->num_frames(),

- out_rev_buf->num_channels(), out_rev_interleaved.data());

- FloatToFloatS16(out_rev_interleaved.data(), out_rev_interleaved.size(),

- out_rev_interleaved.data());

- out_rev_file->WriteSamples(out_rev_interleaved.data(),

- out_rev_interleaved.size());

- }

- num_chunks++;

+ ++num_chunks;

}

if (FLAGS_perf) {

- int64_t execution_time_ms = accumulated_time.Milliseconds();

- printf("\nExecution time: %.3f s\nFile time: %.2f s\n"

- "Time per chunk: %.3f ms\n",

- execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond,

- execution_time_ms * 1.f / num_chunks);

+ int64_t execution_time_ms = processor->processing_time_ms();

peah-webrtc 2015/10/20 21:17:25 It would be nice also to have the max and min comp

Andrew MacDonald 2015/10/21 00:29:28 Good point, done. I typically don't think the max

peah-webrtc 2015/10/21 08:10:04 That makes sense. If we can get a histogram, that

+ printf(

+ "\nExecution time: %.3f s\nFile time: %.2f s\n"

+ "Time per chunk: %.3f ms\n",

+ execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond,

+ execution_time_ms * 1.f / num_chunks);

}

return 0;

}