webrtc/modules/audio_processing/test/audioproc_float.cc - Issue 1234463003: Integrate Intelligibility with APM

Side by Side Diff: webrtc/modules/audio_processing/test/audioproc_float.cc

Issue 1234463003: Integrate Intelligibility with APM (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Addressed comments from Patch Set 7 Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/include/audio_processing.h ('K') | « webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc ('k') | webrtc/voice_engine/output_mixer.h » ('j') | webrtc/voice_engine/output_mixer.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <stdio.h>	11 #include <stdio.h>

12 #include <sstream>	12 #include <sstream>

13 #include <string>	13 #include <string>

14	14

15 #include "gflags/gflags.h"	15 #include "gflags/gflags.h"

16 #include "webrtc/base/checks.h"	16 #include "webrtc/base/checks.h"

17 #include "webrtc/base/scoped_ptr.h"	17 #include "webrtc/base/scoped_ptr.h"

18 #include "webrtc/common_audio/channel_buffer.h"	18 #include "webrtc/common_audio/channel_buffer.h"

19 #include "webrtc/common_audio/wav_file.h"	19 #include "webrtc/common_audio/wav_file.h"

20 #include "webrtc/modules/audio_processing/include/audio_processing.h"	20 #include "webrtc/modules/audio_processing/include/audio_processing.h"

21 #include "webrtc/modules/audio_processing/test/protobuf_utils.h"	21 #include "webrtc/modules/audio_processing/test/protobuf_utils.h"

22 #include "webrtc/modules/audio_processing/test/test_utils.h"	22 #include "webrtc/modules/audio_processing/test/test_utils.h"

23 #include "webrtc/system_wrappers/interface/tick_util.h"	23 #include "webrtc/system_wrappers/interface/tick_util.h"

24 #include "webrtc/test/testsupport/trace_to_stderr.h"	24 #include "webrtc/test/testsupport/trace_to_stderr.h"

25	25

26 DEFINE_string(dump, "", "The name of the debug dump file to read from.");	26 DEFINE_string(dump, "", "The name of the debug dump file to read from.");

27 DEFINE_string(i, "", "The name of the input file to read from.");	27 DEFINE_string(i, "", "The name of the input file to read from.");

	28 DEFINE_string(i_rev, "", "The name of the reverse input file to read from.");

28 DEFINE_string(o, "out.wav", "Name of the output file to write to.");	29 DEFINE_string(o, "out.wav", "Name of the output file to write to.");

	30 DEFINE_string(o_rev,

	31 "out_rev.wav",

	32 "Name of the reverse output file to write to.");

29 DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input.");	33 DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input.");

30 DEFINE_int32(out_sample_rate, 0,	34 DEFINE_int32(out_sample_rate, 0,

31 "Output sample rate in Hz. Defaults to input.");	35 "Output sample rate in Hz. Defaults to input.");

32 DEFINE_string(mic_positions, "",	36 DEFINE_string(mic_positions, "",

33 "Space delimited cartesian coordinates of microphones in meters. "	37 "Space delimited cartesian coordinates of microphones in meters. "

34 "The coordinates of each point are contiguous. "	38 "The coordinates of each point are contiguous. "

35 "For a two element array: \"x1 y1 z1 x2 y2 z2\"");	39 "For a two element array: \"x1 y1 z1 x2 y2 z2\"");

36	40

37 DEFINE_bool(aec, false, "Enable echo cancellation.");	41 DEFINE_bool(aec, false, "Enable echo cancellation.");

38 DEFINE_bool(agc, false, "Enable automatic gain control.");	42 DEFINE_bool(agc, false, "Enable automatic gain control.");

39 DEFINE_bool(hpf, false, "Enable high-pass filtering.");	43 DEFINE_bool(hpf, false, "Enable high-pass filtering.");

40 DEFINE_bool(ns, false, "Enable noise suppression.");	44 DEFINE_bool(ns, false, "Enable noise suppression.");

41 DEFINE_bool(ts, false, "Enable transient suppression.");	45 DEFINE_bool(ts, false, "Enable transient suppression.");

42 DEFINE_bool(bf, false, "Enable beamforming.");	46 DEFINE_bool(bf, false, "Enable beamforming.");

	47 DEFINE_bool(ie, false, "Enable intelligibility enhancer.");

43 DEFINE_bool(all, false, "Enable all components.");	48 DEFINE_bool(all, false, "Enable all components.");

44	49

45 DEFINE_int32(ns_level, -1, "Noise suppression level [0 - 3].");	50 DEFINE_int32(ns_level, -1, "Noise suppression level [0 - 3].");

46	51

47 DEFINE_bool(perf, false, "Enable performance tests.");	52 DEFINE_bool(perf, false, "Enable performance tests.");

48	53

49 namespace webrtc {	54 namespace webrtc {

50 namespace {	55 namespace {

51	56

52 const int kChunksPerSecond = 100;	57 const int kChunksPerSecond = 100;

(...skipping 25 matching lines...) Expand all Loading...
78 WavReader in_file(FLAGS_i);	83 WavReader in_file(FLAGS_i);

79 // If the output format is uninitialized, use the input format.	84 // If the output format is uninitialized, use the input format.

80 const int out_channels =	85 const int out_channels =

81 FLAGS_out_channels ? FLAGS_out_channels : in_file.num_channels();	86 FLAGS_out_channels ? FLAGS_out_channels : in_file.num_channels();

82 const int out_sample_rate =	87 const int out_sample_rate =

83 FLAGS_out_sample_rate ? FLAGS_out_sample_rate : in_file.sample_rate();	88 FLAGS_out_sample_rate ? FLAGS_out_sample_rate : in_file.sample_rate();

84 WavWriter out_file(FLAGS_o, out_sample_rate, out_channels);	89 WavWriter out_file(FLAGS_o, out_sample_rate, out_channels);

85	90

86 Config config;	91 Config config;

87 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts \|\| FLAGS_all));	92 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts \|\| FLAGS_all));

	93 config.Set<Intelligibility>(new Intelligibility(FLAGS_ie \|\| FLAGS_all));

88	94

89 if (FLAGS_bf \|\| FLAGS_all) {	95 if (FLAGS_bf \|\| FLAGS_all) {

90 const size_t num_mics = in_file.num_channels();	96 const size_t num_mics = in_file.num_channels();

91 const std::vector<Point> array_geometry =	97 const std::vector<Point> array_geometry =

92 ParseArrayGeometry(FLAGS_mic_positions, num_mics);	98 ParseArrayGeometry(FLAGS_mic_positions, num_mics);

93 CHECK_EQ(array_geometry.size(), num_mics);	99 CHECK_EQ(array_geometry.size(), num_mics);

94	100

95 config.Set<Beamforming>(new Beamforming(true, array_geometry));	101 config.Set<Beamforming>(new Beamforming(true, array_geometry));

96 }	102 }

97	103

98 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));	104 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));

99 if (!FLAGS_dump.empty()) {	105 if (!FLAGS_dump.empty()) {

100 CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec \|\| FLAGS_all));	106 CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec \|\| FLAGS_all));

101 } else if (FLAGS_aec) {	107 } else if (FLAGS_aec) {

102 fprintf(stderr, "-aec requires a -dump file.\n");	108 fprintf(stderr, "-aec requires a -dump file.\n");

103 return -1;	109 return -1;

104 }	110 }

	111 bool process_reverse = !FLAGS_i_rev.empty();

105 CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc \|\| FLAGS_all));	112 CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc \|\| FLAGS_all));

106 CHECK_EQ(kNoErr, ap->gain_control()->set_mode(GainControl::kFixedDigital));	113 CHECK_EQ(kNoErr, ap->gain_control()->set_mode(GainControl::kFixedDigital));

107 CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf \|\| FLAGS_all));	114 CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf \|\| FLAGS_all));

108 CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns \|\| FLAGS_all));	115 CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns \|\| FLAGS_all));

109 if (FLAGS_ns_level != -1)	116 if (FLAGS_ns_level != -1)

110 CHECK_EQ(kNoErr, ap->noise_suppression()->set_level(	117 CHECK_EQ(kNoErr, ap->noise_suppression()->set_level(

111 static_cast<NoiseSuppression::Level>(FLAGS_ns_level)));	118 static_cast<NoiseSuppression::Level>(FLAGS_ns_level)));

112	119

113 printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n",	120 printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

114 FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate());	121 FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate());

115 printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n",	122 printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

116 FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate());	123 FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate());

117	124

118 ChannelBuffer<float> in_buf(	125 ChannelBuffer<float> in_buf(

119 rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond),	126 rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond),

120 in_file.num_channels());	127 in_file.num_channels());

121 ChannelBuffer<float> out_buf(	128 ChannelBuffer<float> out_buf(

122 rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond),	129 rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond),

123 out_file.num_channels());	130 out_file.num_channels());

124	131

125 std::vector<float> in_interleaved(in_buf.size());	132 std::vector<float> in_interleaved(in_buf.size());

126 std::vector<float> out_interleaved(out_buf.size());	133 std::vector<float> out_interleaved(out_buf.size());

	134

	135 rtc::scoped_ptr<WavReader> in_rev_file;

	136 rtc::scoped_ptr<WavWriter> out_rev_file;

	137 rtc::scoped_ptr<ChannelBuffer<float>> in_rev_buf;

	138 std::vector<float> in_rev_interleaved;

	139 if (process_reverse) {

	140 in_rev_file.reset(new WavReader(FLAGS_i_rev));

	141 out_rev_file.reset(new WavWriter(FLAGS_o_rev, in_rev_file->sample_rate(),

	142 in_rev_file->num_channels()));

	143 printf("In rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

	144 FLAGS_i_rev.c_str(), in_rev_file->num_channels(),

	145 in_rev_file->sample_rate());

	146 printf("Out rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n",

	147 FLAGS_o_rev.c_str(), out_rev_file->num_channels(),

	148 out_rev_file->sample_rate());

	149 in_rev_buf.reset(new ChannelBuffer<float>(

	150 rtc::CheckedDivExact(in_rev_file->sample_rate(), kChunksPerSecond),

	151 in_rev_file->num_channels()));

	152 in_rev_interleaved.resize(in_rev_buf->size());

	153 }

	154

127 TickTime processing_start_time;	155 TickTime processing_start_time;

128 TickInterval accumulated_time;	156 TickInterval accumulated_time;

129 int num_chunks = 0;	157 int num_chunks = 0;

130	158

131 const StreamConfig input_config = {	159 const StreamConfig input_config = {

132 in_file.sample_rate(), in_buf.num_channels(),	160 in_file.sample_rate(), in_buf.num_channels(),

133 };	161 };

134 const StreamConfig output_config = {	162 const StreamConfig output_config = {

135 out_file.sample_rate(), out_buf.num_channels(),	163 out_file.sample_rate(), out_buf.num_channels(),

136 };	164 };

137 while (in_file.ReadSamples(in_interleaved.size(),	165 while (in_file.ReadSamples(in_interleaved.size(),

138 &in_interleaved[0]) == in_interleaved.size()) {	166 &in_interleaved[0]) == in_interleaved.size()) {

139 // Have logs display the file time rather than wallclock time.	167 // Have logs display the file time rather than wallclock time.

140 trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond);	168 trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond);

141 FloatS16ToFloat(&in_interleaved[0], in_interleaved.size(),	169 FloatS16ToFloat(&in_interleaved[0], in_interleaved.size(),

142 &in_interleaved[0]);	170 &in_interleaved[0]);

143 Deinterleave(&in_interleaved[0], in_buf.num_frames(),	171 Deinterleave(&in_interleaved[0], in_buf.num_frames(),

144 in_buf.num_channels(), in_buf.channels());	172 in_buf.num_channels(), in_buf.channels());

	173 if (process_reverse) {

	174 in_rev_file->ReadSamples(in_rev_interleaved.size(),

	175 in_rev_interleaved.data());

	176 FloatS16ToFloat(in_rev_interleaved.data(), in_rev_interleaved.size(),

	177 in_rev_interleaved.data());

	178 Deinterleave(in_rev_interleaved.data(), in_rev_buf->num_frames(),

	179 in_rev_buf->num_channels(), in_rev_buf->channels());

	180 }

145	181

146 if (FLAGS_perf) {	182 if (FLAGS_perf) {

147 processing_start_time = TickTime::Now();	183 processing_start_time = TickTime::Now();

148 }	184 }

149 CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config,	185 CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config,

150 output_config, out_buf.channels()));	186 output_config, out_buf.channels()));

	187 if (process_reverse) {

	188 CHECK_EQ(kNoErr, ap->ProcessReverseStream(

	189 in_rev_buf->channels(), in_rev_buf->num_frames(),

	190 in_rev_file->sample_rate(),

	191 LayoutFromChannels(in_rev_buf->num_channels())));

	192 }

151 if (FLAGS_perf) {	193 if (FLAGS_perf) {

152 accumulated_time += TickTime::Now() - processing_start_time;	194 accumulated_time += TickTime::Now() - processing_start_time;

153 }	195 }

154	196

155 Interleave(out_buf.channels(), out_buf.num_frames(),	197 Interleave(out_buf.channels(), out_buf.num_frames(),

156 out_buf.num_channels(), &out_interleaved[0]);	198 out_buf.num_channels(), &out_interleaved[0]);

157 FloatToFloatS16(&out_interleaved[0], out_interleaved.size(),	199 FloatToFloatS16(&out_interleaved[0], out_interleaved.size(),

158 &out_interleaved[0]);	200 &out_interleaved[0]);

159 out_file.WriteSamples(&out_interleaved[0], out_interleaved.size());	201 out_file.WriteSamples(&out_interleaved[0], out_interleaved.size());

	202 if (process_reverse) {

	203 Interleave(in_rev_buf->channels(), in_rev_buf->num_frames(),

	204 in_rev_buf->num_channels(), in_rev_interleaved.data());

	205 FloatToFloatS16(in_rev_interleaved.data(), in_rev_interleaved.size(),

	206 in_rev_interleaved.data());

	207 out_rev_file->WriteSamples(in_rev_interleaved.data(),

	208 in_rev_interleaved.size());

	209 }

160 num_chunks++;	210 num_chunks++;

161 }	211 }

162 if (FLAGS_perf) {	212 if (FLAGS_perf) {

163 int64_t execution_time_ms = accumulated_time.Milliseconds();	213 int64_t execution_time_ms = accumulated_time.Milliseconds();

164 printf("\nExecution time: %.3f s\nFile time: %.2f s\n"	214 printf("\nExecution time: %.3f s\nFile time: %.2f s\n"

165 "Time per chunk: %.3f ms\n",	215 "Time per chunk: %.3f ms\n",

166 execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond,	216 execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond,

167 execution_time_ms * 1.f / num_chunks);	217 execution_time_ms * 1.f / num_chunks);

168 }	218 }

169 return 0;	219 return 0;

170 }	220 }

171	221

172 } // namespace webrtc	222 } // namespace webrtc

173	223

174 int main(int argc, char* argv[]) {	224 int main(int argc, char* argv[]) {

175 return webrtc::main(argc, argv);	225 return webrtc::main(argc, argv);

176 }	226 }

OLD	NEW