webrtc/modules/audio_processing/intelligibility/intelligibility_proc.cc - Issue 1182323005: Allow intelligibility to compile in apm

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_proc.cc

Issue 1182323005: Allow intelligibility to compile in apm (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Use size_t to address win_x64_rel warning errors Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <arpa/inet.h>	11 //

12 #include <fcntl.h>	12 // Command line tool for speech intelligibility enhancement. Provides for

	13 // running and testing intelligibility_enhancer as an independent process.

	14 // Use --help for options.

	15 //

	16

13 #include <stdint.h>	17 #include <stdint.h>

14 #include <stdio.h>

15 #include <stdlib.h>	18 #include <stdlib.h>

16 #include <sys/mman.h>	19 #include <string>

17 #include <sys/stat.h>	20 #include <sys/stat.h>

18 #include <sys/types.h>	21 #include <sys/types.h>

19 #include <unistd.h>

20

21 #include <fenv.h>

22 #include <limits>

23

24 #include <complex>

25	22

26 #include "gflags/gflags.h"	23 #include "gflags/gflags.h"

	24 #include "testing/gtest/include/gtest/gtest.h"

27 #include "webrtc/base/checks.h"	25 #include "webrtc/base/checks.h"

28 #include "webrtc/common_audio/real_fourier.h"	26 #include "webrtc/common_audio/real_fourier.h"

	27 #include "webrtc/common_audio/wav_file.h"

29 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	28 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

30 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	29 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

31 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"	30 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

32 #include "webrtc/system_wrappers/interface/scoped_ptr.h"

33	31

	32 // PCM data simulating streams

34 const int16_t* in_ipcm;	33 const int16_t* in_ipcm;
	Andrew MacDonald 2015/06/18 03:38:15 I don't think any of these from here down to line I don't think any of these from here down to line 43 need to be globals, right? ekm 2015/06/18 18:50:27 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > I don't think any of these from here down to line 43 need to be globals, right? Done.
35 int16_t* out_ipcm;

36 const int16_t* noise_ipcm;

37

38 float* in_fpcm;	34 float* in_fpcm;

39 float* out_fpcm;	35 float* out_fpcm;

40 float* noise_fpcm;	36 float* noise_fpcm;

	37

	38 // Current locations in streams

41 float* noise_cursor;	39 float* noise_cursor;

42 float* clear_cursor;	40 float* clear_cursor;

43	41

44 int samples;	42 size_t samples; // Number of samples in input PCM file

45 int fragment_size;	43 size_t fragment_size; // Number of samples to process at a time

	44 // to simulate APM stream processing

46	45

47 using std::complex;	46 using std::complex;

	47

	48 namespace webrtc {

	49

48 using webrtc::RealFourier;	50 using webrtc::RealFourier;

49 using webrtc::IntelligibilityEnhancer;	51 using webrtc::IntelligibilityEnhancer;

50	52

51 DEFINE_int32(clear_type, webrtc::intelligibility::VarianceArray::kStepInfinite,	53 DEFINE_int32(clear_type,

	54 webrtc::intelligibility::VarianceArray::kStepInfinite,

52 "Variance algorithm for clear data.");	55 "Variance algorithm for clear data.");

53 DEFINE_double(clear_alpha, 0.9,	56 DEFINE_double(clear_alpha, 0.9, "Variance decay factor for clear data.");

54 "Variance decay factor for clear data.");	57 DEFINE_int32(clear_window,

55 DEFINE_int32(clear_window, 475,	58 475,

56 "Window size for windowed variance for clear data.");	59 "Window size for windowed variance for clear data.");

57 DEFINE_int32(sample_rate, 16000,	60 DEFINE_int32(sample_rate,

	61 16000,

58 "Audio sample rate used in the input and output files.");	62 "Audio sample rate used in the input and output files.");

59 DEFINE_int32(ana_rate, 800,	63 DEFINE_int32(ana_rate,

	64 800,

60 "Analysis rate; gains recalculated every N blocks.");	65 "Analysis rate; gains recalculated every N blocks.");

61 DEFINE_int32(var_rate, 2,	66 DEFINE_int32(

62 "Variance clear rate; history is forgotten every N gain recalculati ons.");	67 var_rate,

	68 2,

	69 "Variance clear rate; history is forgotten every N gain recalculations.");

63 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");	70 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");

64	71

65 DEFINE_bool(repeat, false, "Repeat input file ad nauseam.");	72 DEFINE_bool(repeat, false, "Repeat input file ad nauseam.");

66	73

67 DEFINE_string(clear_file, "speech.pcm", "Input file with clear speech.");	74 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");

68 DEFINE_string(noise_file, "noise.pcm", "Input file with noise data.");	75 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");

69 DEFINE_string(out_file, "proc_enhanced.pcm", "Enhanced output. Use '-' to "	76 DEFINE_string(out_file,

70 "pipe through aplay internally.");	77 "proc_enhanced.wav",

	78 "Enhanced output. Use '-' to "

	79 "play through aplay immediately.");

71	80

72 // Write an Sun AU-formatted audio chunk into file descriptor \|fd\|. Can be used	81 // Constant IntelligibilityEnhancer constructor parameters.

73 // to pipe the audio stream directly into aplay.	82 const int kErbResolution = 2;

74 void writeau(int fd) {	83 const int kNumChannels = 1;

75 uint32_t thing;

76	84

77 write(fd, ".snd", 4);	85 const std::string kTmpOutName = "tmp_out_file.wav";
	Andrew MacDonald 2015/06/18 03:38:15 Prefer not to use non-POD types as static globals. Prefer not to use non-POD types as static globals. Either make this a const char* or move it into void_main. ekm 2015/06/18 18:50:26 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > Prefer not to use non-POD types as static globals. Either make this a const > char* or move it into void_main. Done.
78 thing = htonl(24);

79 write(fd, &thing, sizeof(thing));

80 thing = htonl(0xffffffff);

81 write(fd, &thing, sizeof(thing));

82 thing = htonl(3);

83 write(fd, &thing, sizeof(thing));

84 thing = htonl(FLAGS_sample_rate);

85 write(fd, &thing, sizeof(thing));

86 thing = htonl(1);

87 write(fd, &thing, sizeof(thing));

88	86

89 for (int i = 0; i < samples; ++i) {	87 // void function for gtest

90 out_ipcm[i] = htons(out_ipcm[i]);	88 void void_main(int argc, char* argv[]) {

91 }	89 google::SetUsageMessage(

92 write(fd, out_ipcm, sizeof(out_ipcm) samples);	90 "\n\nVariance algorithm types are:\n"

93 }	91 " 0 - infinite/normal,\n"

94	92 " 1 - exponentially decaying,\n"

95 int main(int argc, char* argv[]) {	93 " 2 - rolling window.\n"

96 google::SetUsageMessage("\n\nVariance algorithm types are:\n"	94 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

97 " 0 - infinite/normal,\n"

98 " 1 - exponentially decaying,\n"

99 " 2 - rolling window.\n"

100 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

101 google::ParseCommandLineFlags(&argc, &argv, true);	95 google::ParseCommandLineFlags(&argc, &argv, true);

102	96

103 const char* in_name = FLAGS_clear_file.c_str();	97 const char* in_name = FLAGS_clear_file.c_str();

104 const char* out_name = FLAGS_out_file.c_str();	98 const char* out_name = FLAGS_out_file.c_str();

105 const char* noise_name = FLAGS_noise_file.c_str();	99 const char* noise_name = FLAGS_noise_file.c_str();

106 struct stat in_stat, noise_stat;	100 struct stat in_stat, noise_stat;

107 int in_fd, out_fd, noise_fd;	101 std::string out_file_name;

108 FILE* aplay_file = nullptr;	102 std::string in_file_name = in_name;

	103 std::string noise_file_name = noise_name;
	Andrew MacDonald 2015/06/18 03:38:15 You have three variables referring to each of the You have three variables referring to each of the filenames. I think you can get away with just using the FLAGS variant everywhere. ekm 2015/06/18 18:50:26 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > You have three variables referring to each of the filenames. I think you can get > away with just using the FLAGS variant everywhere. Done.
109	104

110 fragment_size = FLAGS_sample_rate / 100;	105 // Load settings and wav input.

111	106

112 stat(in_name, &in_stat);	107 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size.

113 stat(noise_name, &noise_stat);	108 // Duplicates chunk_length_ in

	109 // IntelligibilityEnhancer.

	110

	111 ASSERT_EQ(stat(in_name, &in_stat), 0) << "Empty speech input.";
	Andrew MacDonald 2015/06/18 03:38:15 Not sure these are really needed. How often will y Not sure these are really needed. How often will you have any empty file? ekm 2015/06/18 18:50:27 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > Not sure these are really needed. How often will you have any empty file? Done.
	112 ASSERT_EQ(stat(noise_name, &noise_stat), 0) << "Empty noise input.";

	113

114 samples = in_stat.st_size / sizeof(*in_ipcm);	114 samples = in_stat.st_size / sizeof(*in_ipcm);

115	115

116 in_fd = open(in_name, O_RDONLY);

117 if (!strcmp(out_name, "-")) {	116 if (!strcmp(out_name, "-")) {

118 aplay_file = popen("aplay -t au", "w");	117 out_file_name = kTmpOutName;

119 out_fd = fileno(aplay_file);

120 } else {	118 } else {

121 out_fd = open(out_name, O_WRONLY \| O_CREAT \| O_TRUNC,	119 out_file_name = out_name;

122 S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP \| S_IROTH \| S_IWOTH);

123 }

124 noise_fd = open(noise_name, O_RDONLY);

125

126 in_ipcm = static_cast<int16_t*>(mmap(nullptr, in_stat.st_size, PROT_READ,

127 MAP_PRIVATE, in_fd, 0));

128 noise_ipcm = static_cast<int16_t*>(mmap(nullptr, noise_stat.st_size,

129 PROT_READ, MAP_PRIVATE, noise_fd, 0));

130 out_ipcm = new int16_t[samples];

131 out_fpcm = new float[samples];

132 in_fpcm = new float[samples];

133 noise_fpcm = new float[samples];

134

135 for (int i = 0; i < samples; ++i) {

136 noise_fpcm[i] = noise_ipcm[i % (noise_stat.st_size / sizeof(*noise_ipcm))];

137 }	120 }

138	121

139 //feenableexcept(FE_INVALID \| FE_OVERFLOW);	122 WavReader in_file(in_file_name);

140 IntelligibilityEnhancer enh(2,	123 in_fpcm = new float[samples];
	Andrew MacDonald 2015/06/18 03:38:15 vector or at least scoped_ptr. Doesn't matter if w vector or at least scoped_ptr. Doesn't matter if we leak here of course, but let's practice good habits ;) ekm 2015/06/18 18:50:26 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > vector or at least scoped_ptr. Doesn't matter if we leak here of course, but > let's practice good habits ;) Done.
141 FLAGS_sample_rate, 1,	124 const size_t samples_read = in_file.ReadSamples(samples, in_fpcm);

142 FLAGS_clear_type,	125

143 static_cast<float>(FLAGS_clear_alpha),	126 WavReader noise_file(noise_file_name);

144 FLAGS_clear_window,	127 noise_fpcm = new float[samples];
	Andrew MacDonald 2015/06/18 03:38:15 vector vector ekm 2015/06/18 18:50:27 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > vector Done.
145 FLAGS_ana_rate,	128 const size_t noise_samples = noise_file.ReadSamples(samples_read, noise_fpcm);

146 FLAGS_var_rate,	129 for (size_t i = noise_samples; i < samples_read; ++i) {

147 FLAGS_gain_limit);	130 noise_fpcm[i] = noise_fpcm[i % noise_samples];
	Andrew MacDonald 2015/06/18 03:38:15 This is a bit weird. You're restarting the noise f This is a bit weird. You're restarting the noise file, which may not be a great idea, depending on the content. I think better to crop to the shorter of the two files. ekm 2015/06/18 18:50:27 For iid noise it's fine, but I see with real world Show quoted text On 2015/06/18 03:38:15, andrew wrote: > This is a bit weird. You're restarting the noise file, which may not be a great > idea, depending on the content. > > I think better to crop to the shorter of the two files. For iid noise it's fine, but I see with real world noise things could get weird. Done.
	131 }

	132

	133 // Run intelligibility enhancement.

	134

	135 IntelligibilityEnhancer enh(

	136 kErbResolution,

	137 FLAGS_sample_rate,

	138 kNumChannels,

	139 FLAGS_clear_type, static_cast<float>(FLAGS_clear_alpha),

	140 FLAGS_clear_window, FLAGS_ana_rate, FLAGS_var_rate, FLAGS_gain_limit);

148	141

149 // Slice the input into smaller chunks, as the APM would do, and feed them	142 // Slice the input into smaller chunks, as the APM would do, and feed them

150 // into the enhancer. Repeat indefinitely if FLAGS_repeat is set.	143 // through the enhancer. Repeat indefinitely if FLAGS_repeat is set.

151 do {	144 do {

	145 clear_cursor = in_fpcm;

152 noise_cursor = noise_fpcm;	146 noise_cursor = noise_fpcm;

153 clear_cursor = in_fpcm;

154 for (int i = 0; i < samples; ++i) {

155 in_fpcm[i] = in_ipcm[i];

156 }

157	147

158 for (int i = 0; i < samples; i += fragment_size) {	148 for (size_t i = 0; i < samples; i += fragment_size) {
	Andrew MacDonald 2015/06/18 03:38:15 I think it's more typical to read directly from th I think it's more typical to read directly from the file in chunks, but this is OK. ekm 2015/06/18 18:50:26 Acknowledged. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > I think it's more typical to read directly from the file in chunks, but this is > OK. Acknowledged.
159 enh.ProcessCaptureAudio(&noise_cursor);	149 enh.ProcessCaptureAudio(&noise_cursor);

160 enh.ProcessRenderAudio(&clear_cursor);	150 enh.ProcessRenderAudio(&clear_cursor);

161 clear_cursor += fragment_size;	151 clear_cursor += fragment_size;

162 noise_cursor += fragment_size;	152 noise_cursor += fragment_size;

163 }	153 }

164	154

165 for (int i = 0; i < samples; ++i) {	155 {

166 out_ipcm[i] = static_cast<float>(in_fpcm[i]);	156 WavWriter out_file(out_file_name, FLAGS_sample_rate, kNumChannels);
	Andrew MacDonald 2015/06/18 03:38:15 Indent inside scopes. Indent inside scopes. ekm 2015/06/18 18:50:26 Done. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > Indent inside scopes. Done.
	157 out_file.WriteSamples(in_fpcm, samples);

167 }	158 }

	159

168 if (!strcmp(out_name, "-")) {	160 if (!strcmp(out_name, "-")) {

169 writeau(out_fd);	161 system(("aplay " + out_file_name).c_str());

170 } else {	162 system(("rm " + out_file_name).c_str());

171 write(out_fd, out_ipcm, samples * sizeof(*out_ipcm));

172 }	163 }

	164

173 } while (FLAGS_repeat);	165 } while (FLAGS_repeat);
	Andrew MacDonald 2015/06/18 03:38:15 This probably made sense with the piping directly This probably made sense with the piping directly to aplay. But now that you write the whole thing to file then read, you're doing to have a gap between the repeats. I think better to just remove this option. Not sure this is worth it, but if you want the same behavior, you could open a fifo: http://linux.die.net/man/3/mkfifo Write to it using WavWriter and read from it using aplay. ekm 2015/06/18 18:50:26 Done. Removed. I don't think it's worth it. Show quoted text On 2015/06/18 03:38:15, andrew wrote: > This probably made sense with the piping directly to aplay. But now that you > write the whole thing to file then read, you're doing to have a gap between the > repeats. I think better to just remove this option. > > Not sure this is worth it, but if you want the same behavior, you could open a > fifo: > http://linux.die.net/man/3/mkfifo > > Write to it using WavWriter and read from it using aplay. Done. Removed. I don't think it's worth it.
174	166

175 munmap(const_cast<int16_t*>(noise_ipcm), noise_stat.st_size);	167 }

176 munmap(const_cast<int16_t*>(in_ipcm), in_stat.st_size);

177 close(noise_fd);

178 if (aplay_file) {

179 pclose(aplay_file);

180 } else {

181 close(out_fd);

182 }

183 close(in_fd);

184	168

	169 } // namespace webrtc

	170

	171 int main(int argc, char* argv[]) {

	172 webrtc::void_main(argc, argv);

185 return 0;	173 return 0;

186 }	174 }

187

OLD	NEW