webrtc/modules/audio_processing/intelligibility/intelligibility_proc.cc - Issue 1187033005: Revert of Allow intelligibility to compile in apm

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_proc.cc

Issue 1187033005: Revert of Allow intelligibility to compile in apm (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //

12 // Command line tool for speech intelligibility enhancement. Provides for

13 // running and testing intelligibility_enhancer as an independent process.

14 // Use --help for options.

15 //

16

17 #include <arpa/inet.h>	11 #include <arpa/inet.h>

18 #include <fcntl.h>	12 #include <fcntl.h>

19 #include <stdint.h>	13 #include <stdint.h>

20 #include <stdio.h>	14 #include <stdio.h>

21 #include <stdlib.h>	15 #include <stdlib.h>

22 #include <sys/mman.h>	16 #include <sys/mman.h>

23 #include <sys/stat.h>	17 #include <sys/stat.h>

24 #include <sys/types.h>	18 #include <sys/types.h>

25 #include <unistd.h>	19 #include <unistd.h>

26	20

27 #include <fenv.h>	21 #include <fenv.h>

28 #include <limits>	22 #include <limits>

29	23

30 #include <complex>	24 #include <complex>

31	25

32 #include "gflags/gflags.h"	26 #include "gflags/gflags.h"

33 #include "testing/gtest/include/gtest/gtest.h"

34 #include "webrtc/base/checks.h"	27 #include "webrtc/base/checks.h"

35 #include "webrtc/common_audio/real_fourier.h"	28 #include "webrtc/common_audio/real_fourier.h"

36 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	29 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

37 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	30 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

38 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"	31 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

	32 #include "webrtc/system_wrappers/interface/scoped_ptr.h"

39	33

40 // PCM data simulating streams

41 const int16_t* in_ipcm;	34 const int16_t* in_ipcm;

42 int16_t* out_ipcm;	35 int16_t* out_ipcm;

43 const int16_t* noise_ipcm;	36 const int16_t* noise_ipcm;

	37

44 float* in_fpcm;	38 float* in_fpcm;

45 float* out_fpcm;	39 float* out_fpcm;

46 float* noise_fpcm;	40 float* noise_fpcm;

47

48 // Current locations in streams

49 float* noise_cursor;	41 float* noise_cursor;

50 float* clear_cursor;	42 float* clear_cursor;

51	43

52 int samples; // Number of samples in input PCM file	44 int samples;

53 int fragment_size; // Number of samples to process at a time	45 int fragment_size;

54 // to simulate APM stream processing

55	46

56 using std::complex;	47 using std::complex;

57

58 namespace webrtc {

59

60 using webrtc::RealFourier;	48 using webrtc::RealFourier;

61 using webrtc::IntelligibilityEnhancer;	49 using webrtc::IntelligibilityEnhancer;

62	50

63 DEFINE_int32(clear_type,	51 DEFINE_int32(clear_type, webrtc::intelligibility::VarianceArray::kStepInfinite,

64 webrtc::intelligibility::VarianceArray::kStepInfinite,

65 "Variance algorithm for clear data.");	52 "Variance algorithm for clear data.");

66 DEFINE_double(clear_alpha, 0.9, "Variance decay factor for clear data.");	53 DEFINE_double(clear_alpha, 0.9,

67 DEFINE_int32(clear_window,	54 "Variance decay factor for clear data.");

68 475,	55 DEFINE_int32(clear_window, 475,

69 "Window size for windowed variance for clear data.");	56 "Window size for windowed variance for clear data.");

70 DEFINE_int32(sample_rate,	57 DEFINE_int32(sample_rate, 16000,

71 16000,

72 "Audio sample rate used in the input and output files.");	58 "Audio sample rate used in the input and output files.");

73 DEFINE_int32(ana_rate,	59 DEFINE_int32(ana_rate, 800,

74 800,

75 "Analysis rate; gains recalculated every N blocks.");	60 "Analysis rate; gains recalculated every N blocks.");

76 DEFINE_int32(	61 DEFINE_int32(var_rate, 2,

77 var_rate,	62 "Variance clear rate; history is forgotten every N gain recalculati ons.");

78 2,

79 "Variance clear rate; history is forgotten every N gain recalculations.");

80 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");	63 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");

81	64

82 DEFINE_bool(repeat, false, "Repeat input file ad nauseam.");	65 DEFINE_bool(repeat, false, "Repeat input file ad nauseam.");

83	66

84 DEFINE_string(clear_file, "speech.pcm", "Input file with clear speech.");	67 DEFINE_string(clear_file, "speech.pcm", "Input file with clear speech.");

85 DEFINE_string(noise_file, "noise.pcm", "Input file with noise data.");	68 DEFINE_string(noise_file, "noise.pcm", "Input file with noise data.");

86 DEFINE_string(out_file,	69 DEFINE_string(out_file, "proc_enhanced.pcm", "Enhanced output. Use '-' to "

87 "proc_enhanced.pcm",

88 "Enhanced output. Use '-' to "

89 "pipe through aplay internally.");	70 "pipe through aplay internally.");

90	71

91 // Constant IntelligibilityEnhancer constructor parameters.	72 // Write an Sun AU-formatted audio chunk into file descriptor \|fd\|. Can be used

92 const int kErbResolution = 2;	73 // to pipe the audio stream directly into aplay.

93 const int kNumChannels = 1;

94

95 // Converts output stream to Sun AU format and writes to file descriptor \|fd\|.

96 // Can be used to pipe output directly into aplay.

97 // TODO(ekmeyerson): Modify to write WAV instead.

98 void writeau(int fd) {	74 void writeau(int fd) {

99 uint32_t thing;	75 uint32_t thing;

100	76

101 write(fd, ".snd", 4);	77 write(fd, ".snd", 4);

102 thing = htonl(24);	78 thing = htonl(24);

103 write(fd, &thing, sizeof(thing));	79 write(fd, &thing, sizeof(thing));

104 thing = htonl(0xffffffff);	80 thing = htonl(0xffffffff);

105 write(fd, &thing, sizeof(thing));	81 write(fd, &thing, sizeof(thing));

106 thing = htonl(3);	82 thing = htonl(3);

107 write(fd, &thing, sizeof(thing));	83 write(fd, &thing, sizeof(thing));

108 thing = htonl(FLAGS_sample_rate);	84 thing = htonl(FLAGS_sample_rate);

109 write(fd, &thing, sizeof(thing));	85 write(fd, &thing, sizeof(thing));

110 thing = htonl(1);	86 thing = htonl(1);

111 write(fd, &thing, sizeof(thing));	87 write(fd, &thing, sizeof(thing));

112	88

113 for (int i = 0; i < samples; ++i) {	89 for (int i = 0; i < samples; ++i) {

114 out_ipcm[i] = htons(out_ipcm[i]);	90 out_ipcm[i] = htons(out_ipcm[i]);

115 }	91 }

116 write(fd, out_ipcm, sizeof(out_ipcm) samples);	92 write(fd, out_ipcm, sizeof(out_ipcm) samples);

117 }	93 }

118	94

119 // void function for gtest	95 int main(int argc, char* argv[]) {

120 void void_main(int argc, char* argv[]) {	96 google::SetUsageMessage("\n\nVariance algorithm types are:\n"

121 google::SetUsageMessage(	97 " 0 - infinite/normal,\n"

122 "\n\nVariance algorithm types are:\n"	98 " 1 - exponentially decaying,\n"

123 " 0 - infinite/normal,\n"	99 " 2 - rolling window.\n"

124 " 1 - exponentially decaying,\n"	100 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

125 " 2 - rolling window.\n"

126 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

127 google::ParseCommandLineFlags(&argc, &argv, true);	101 google::ParseCommandLineFlags(&argc, &argv, true);

128	102

129 const char* in_name = FLAGS_clear_file.c_str();	103 const char* in_name = FLAGS_clear_file.c_str();

130 const char* out_name = FLAGS_out_file.c_str();	104 const char* out_name = FLAGS_out_file.c_str();

131 const char* noise_name = FLAGS_noise_file.c_str();	105 const char* noise_name = FLAGS_noise_file.c_str();

132 struct stat in_stat, noise_stat;	106 struct stat in_stat, noise_stat;

133 int in_fd, out_fd, noise_fd;	107 int in_fd, out_fd, noise_fd;

134 FILE* aplay_file = nullptr;	108 FILE* aplay_file = nullptr;

135	109

136 // Load settings and set up PCMs.	110 fragment_size = FLAGS_sample_rate / 100;

137	111

138 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size.	112 stat(in_name, &in_stat);

139 // Duplicates chunk_length_ in	113 stat(noise_name, &noise_stat);

140 // IntelligibilityEnhancer.

141

142 ASSERT_EQ(stat(in_name, &in_stat), 0) << "Empty speech input.";

143 ASSERT_EQ(stat(noise_name, &noise_stat), 0) << "Empty noise input.";

144

145 samples = in_stat.st_size / sizeof(*in_ipcm);	114 samples = in_stat.st_size / sizeof(*in_ipcm);

146	115

147 in_fd = open(in_name, O_RDONLY);	116 in_fd = open(in_name, O_RDONLY);

148 if (!strcmp(out_name, "-")) {	117 if (!strcmp(out_name, "-")) {

149 aplay_file = popen("aplay -t au", "w");	118 aplay_file = popen("aplay -t au", "w");

150 out_fd = fileno(aplay_file);	119 out_fd = fileno(aplay_file);

151 } else {	120 } else {

152 out_fd = open(out_name, O_WRONLY \| O_CREAT \| O_TRUNC,	121 out_fd = open(out_name, O_WRONLY \| O_CREAT \| O_TRUNC,

153 S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP \| S_IROTH \| S_IWOTH);	122 S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP \| S_IROTH \| S_IWOTH);

154 }	123 }

155 noise_fd = open(noise_name, O_RDONLY);	124 noise_fd = open(noise_name, O_RDONLY);

156	125

157 in_ipcm = static_cast<int16_t*>(	126 in_ipcm = static_cast<int16_t*>(mmap(nullptr, in_stat.st_size, PROT_READ,

158 mmap(nullptr, in_stat.st_size, PROT_READ, MAP_PRIVATE, in_fd, 0));	127 MAP_PRIVATE, in_fd, 0));

159 noise_ipcm = static_cast<int16_t*>(	128 noise_ipcm = static_cast<int16_t*>(mmap(nullptr, noise_stat.st_size,

160 mmap(nullptr, noise_stat.st_size, PROT_READ, MAP_PRIVATE, noise_fd, 0));	129 PROT_READ, MAP_PRIVATE, noise_fd, 0));

161 out_ipcm = new int16_t[samples];	130 out_ipcm = new int16_t[samples];

162 out_fpcm = new float[samples];	131 out_fpcm = new float[samples];

163 in_fpcm = new float[samples];	132 in_fpcm = new float[samples];

164 noise_fpcm = new float[samples];	133 noise_fpcm = new float[samples];

165	134

166 for (int i = 0; i < samples; ++i) {	135 for (int i = 0; i < samples; ++i) {

167 noise_fpcm[i] = noise_ipcm[i % (noise_stat.st_size / sizeof(*noise_ipcm))];	136 noise_fpcm[i] = noise_ipcm[i % (noise_stat.st_size / sizeof(*noise_ipcm))];

168 }	137 }

169	138

170 // Run intelligibility enhancement.	139 //feenableexcept(FE_INVALID \| FE_OVERFLOW);

171	140 IntelligibilityEnhancer enh(2,

172 IntelligibilityEnhancer enh(	141 FLAGS_sample_rate, 1,

173 kErbResolution,	142 FLAGS_clear_type,

174 FLAGS_sample_rate,	143 static_cast<float>(FLAGS_clear_alpha),

175 kNumChannels,	144 FLAGS_clear_window,

176 FLAGS_clear_type, static_cast<float>(FLAGS_clear_alpha),	145 FLAGS_ana_rate,

177 FLAGS_clear_window, FLAGS_ana_rate, FLAGS_var_rate, FLAGS_gain_limit);	146 FLAGS_var_rate,

	147 FLAGS_gain_limit);

178	148

179 // Slice the input into smaller chunks, as the APM would do, and feed them	149 // Slice the input into smaller chunks, as the APM would do, and feed them

180 // through the enhancer. Repeat indefinitely if FLAGS_repeat is set.	150 // into the enhancer. Repeat indefinitely if FLAGS_repeat is set.

181 do {	151 do {

182 noise_cursor = noise_fpcm;	152 noise_cursor = noise_fpcm;

183 clear_cursor = in_fpcm;	153 clear_cursor = in_fpcm;

184 for (int i = 0; i < samples; ++i) {	154 for (int i = 0; i < samples; ++i) {

185 in_fpcm[i] = in_ipcm[i];	155 in_fpcm[i] = in_ipcm[i];

186 }	156 }

187	157

188 for (int i = 0; i < samples; i += fragment_size) {	158 for (int i = 0; i < samples; i += fragment_size) {

189 enh.ProcessCaptureAudio(&noise_cursor);	159 enh.ProcessCaptureAudio(&noise_cursor);

190 enh.ProcessRenderAudio(&clear_cursor);	160 enh.ProcessRenderAudio(&clear_cursor);

(...skipping 13 matching lines...) Expand all Loading...
204	174

205 munmap(const_cast<int16_t*>(noise_ipcm), noise_stat.st_size);	175 munmap(const_cast<int16_t*>(noise_ipcm), noise_stat.st_size);

206 munmap(const_cast<int16_t*>(in_ipcm), in_stat.st_size);	176 munmap(const_cast<int16_t*>(in_ipcm), in_stat.st_size);

207 close(noise_fd);	177 close(noise_fd);

208 if (aplay_file) {	178 if (aplay_file) {

209 pclose(aplay_file);	179 pclose(aplay_file);

210 } else {	180 } else {

211 close(out_fd);	181 close(out_fd);

212 }	182 }

213 close(in_fd);	183 close(in_fd);

	184

	185 return 0;

214 }	186 }

215	187

216 } // namespace webrtc

217

218 int main(int argc, char* argv[]) {

219 webrtc::void_main(argc, argv);

220 return 0;

221 }

OLD	NEW