webrtc/modules/audio_processing/intelligibility/intelligibility_proc.cc - Issue 1182323005: Allow intelligibility to compile in apm

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_proc.cc

Issue 1182323005: Allow intelligibility to compile in apm (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Addressed comments Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <arpa/inet.h>	11 //

12 #include <fcntl.h>	12 // Command line tool for speech intelligibility enhancement. Provides for

	13 // running and testing intelligibility_enhancer as an independent process.

	14 // Use --help for options.

	15 //

	16

13 #include <stdint.h>	17 #include <stdint.h>

14 #include <stdio.h>

15 #include <stdlib.h>	18 #include <stdlib.h>

16 #include <sys/mman.h>	19 #include <string>

17 #include <sys/stat.h>	20 #include <sys/stat.h>

18 #include <sys/types.h>	21 #include <sys/types.h>

19 #include <unistd.h>

20

21 #include <fenv.h>

22 #include <limits>

23

24 #include <complex>

25	22

26 #include "gflags/gflags.h"	23 #include "gflags/gflags.h"

	24 #include "testing/gtest/include/gtest/gtest.h"

27 #include "webrtc/base/checks.h"	25 #include "webrtc/base/checks.h"

28 #include "webrtc/common_audio/real_fourier.h"	26 #include "webrtc/common_audio/real_fourier.h"

	27 #include "webrtc/common_audio/wav_file.h"

29 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	28 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

30 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	29 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

31 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"	30 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

32 #include "webrtc/system_wrappers/interface/scoped_ptr.h"

33

34 const int16_t* in_ipcm;

35 int16_t* out_ipcm;

36 const int16_t* noise_ipcm;

37

38 float* in_fpcm;

39 float* out_fpcm;

40 float* noise_fpcm;

41 float* noise_cursor;

42 float* clear_cursor;

43

44 int samples;

45 int fragment_size;

46	31

47 using std::complex;	32 using std::complex;

	33

	34 namespace webrtc {

	35

48 using webrtc::RealFourier;	36 using webrtc::RealFourier;

49 using webrtc::IntelligibilityEnhancer;	37 using webrtc::IntelligibilityEnhancer;

50	38

51 DEFINE_int32(clear_type, webrtc::intelligibility::VarianceArray::kStepInfinite,	39 DEFINE_int32(clear_type,

	40 webrtc::intelligibility::VarianceArray::kStepInfinite,

52 "Variance algorithm for clear data.");	41 "Variance algorithm for clear data.");

53 DEFINE_double(clear_alpha, 0.9,	42 DEFINE_double(clear_alpha, 0.9, "Variance decay factor for clear data.");

54 "Variance decay factor for clear data.");	43 DEFINE_int32(clear_window,

55 DEFINE_int32(clear_window, 475,	44 475,

56 "Window size for windowed variance for clear data.");	45 "Window size for windowed variance for clear data.");

57 DEFINE_int32(sample_rate, 16000,	46 DEFINE_int32(sample_rate,

	47 16000,

58 "Audio sample rate used in the input and output files.");	48 "Audio sample rate used in the input and output files.");

59 DEFINE_int32(ana_rate, 800,	49 DEFINE_int32(ana_rate,

	50 800,

60 "Analysis rate; gains recalculated every N blocks.");	51 "Analysis rate; gains recalculated every N blocks.");

61 DEFINE_int32(var_rate, 2,	52 DEFINE_int32(

62 "Variance clear rate; history is forgotten every N gain recalculati ons.");	53 var_rate,

	54 2,

	55 "Variance clear rate; history is forgotten every N gain recalculations.");

63 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");	56 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");

64	57

65 DEFINE_bool(repeat, false, "Repeat input file ad nauseam.");	58 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");

	59 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");

	60 DEFINE_string(out_file,

	61 "proc_enhanced.wav",

	62 "Enhanced output. Use '-' to "

	63 "play through aplay immediately.");

66	64

67 DEFINE_string(clear_file, "speech.pcm", "Input file with clear speech.");	65 // Constant IntelligibilityEnhancer constructor parameters.

68 DEFINE_string(noise_file, "noise.pcm", "Input file with noise data.");	66 const int kErbResolution = 2;

69 DEFINE_string(out_file, "proc_enhanced.pcm", "Enhanced output. Use '-' to "	67 const int kNumChannels = 1;

70 "pipe through aplay internally.");

71	68

72 // Write an Sun AU-formatted audio chunk into file descriptor \|fd\|. Can be used	69 // void function for gtest

73 // to pipe the audio stream directly into aplay.	70 void void_main(int argc, char* argv[]) {

74 void writeau(int fd) {	71 google::SetUsageMessage(

75 uint32_t thing;	72 "\n\nVariance algorithm types are:\n"

	73 " 0 - infinite/normal,\n"

	74 " 1 - exponentially decaying,\n"

	75 " 2 - rolling window.\n"

	76 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

	77 google::ParseCommandLineFlags(&argc, &argv, true);

76	78

77 write(fd, ".snd", 4);	79 const std::string kTmpOutFile = "tmp_out_file.wav";

78 thing = htonl(24);

79 write(fd, &thing, sizeof(thing));

80 thing = htonl(0xffffffff);

81 write(fd, &thing, sizeof(thing));

82 thing = htonl(3);

83 write(fd, &thing, sizeof(thing));

84 thing = htonl(FLAGS_sample_rate);

85 write(fd, &thing, sizeof(thing));

86 thing = htonl(1);

87 write(fd, &thing, sizeof(thing));

88	80

89 for (int i = 0; i < samples; ++i) {	81 size_t samples; // Number of samples in input PCM file

90 out_ipcm[i] = htons(out_ipcm[i]);	82 size_t fragment_size; // Number of samples to process at a time

	83 // to simulate APM stream processing

	84

	85 // Load settings and wav input.

	86

	87 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size.

	88 // Duplicates chunk_length_ in

	89 // IntelligibilityEnhancer.

	90

	91 struct stat in_stat, noise_stat;

	92 stat(FLAGS_clear_file.c_str(), &in_stat);

	93 stat(FLAGS_noise_file.c_str(), &noise_stat);

	94

	95 samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;

	96

	97 WavReader in_file(FLAGS_clear_file);

	98 std::vector<float> in_fpcm(samples);

	99 in_file.ReadSamples(samples, &in_fpcm[0]);

	100

	101 WavReader noise_file(FLAGS_noise_file);

	102 std::vector<float> noise_fpcm(samples);

	103 noise_file.ReadSamples(samples, &noise_fpcm[0]);

	104

	105 // Run intelligibility enhancement.

	106

	107 IntelligibilityEnhancer enh(

	108 kErbResolution,

	109 FLAGS_sample_rate,

	110 kNumChannels,

	111 FLAGS_clear_type, static_cast<float>(FLAGS_clear_alpha),

	112 FLAGS_clear_window, FLAGS_ana_rate, FLAGS_var_rate, FLAGS_gain_limit);

	113

	114 // Slice the input into smaller chunks, as the APM would do, and feed them

	115 // through the enhancer.

	116 float* clear_cursor = &in_fpcm[0];

	117 float* noise_cursor = &noise_fpcm[0];

	118

	119 for (size_t i = 0; i < samples; i += fragment_size) {

	120 enh.ProcessCaptureAudio(&noise_cursor);

	121 enh.ProcessRenderAudio(&clear_cursor);

	122 clear_cursor += fragment_size;

	123 noise_cursor += fragment_size;

91 }	124 }

92 write(fd, out_ipcm, sizeof(out_ipcm) samples);	125

	126

	127 if (FLAGS_out_file.compare("-") == 0) {

	128 {

	129 WavWriter out_file(kTmpOutFile, FLAGS_sample_rate, kNumChannels);

	130 out_file.WriteSamples(&in_fpcm[0], samples);

	131 }

	132 system(("aplay " + kTmpOutFile).c_str());

	133 system(("rm " + kTmpOutFile).c_str());

	134 } else {

	135 WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels);

	136 out_file.WriteSamples(&in_fpcm[0], samples);

	137 }

	138

93 }	139 }

94	140

	141 } // namespace webrtc

	142

95 int main(int argc, char* argv[]) {	143 int main(int argc, char* argv[]) {

96 google::SetUsageMessage("\n\nVariance algorithm types are:\n"	144 webrtc::void_main(argc, argv);

97 " 0 - infinite/normal,\n"

98 " 1 - exponentially decaying,\n"

99 " 2 - rolling window.\n"

100 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

101 google::ParseCommandLineFlags(&argc, &argv, true);

102

103 const char* in_name = FLAGS_clear_file.c_str();

104 const char* out_name = FLAGS_out_file.c_str();

105 const char* noise_name = FLAGS_noise_file.c_str();

106 struct stat in_stat, noise_stat;

107 int in_fd, out_fd, noise_fd;

108 FILE* aplay_file = nullptr;

109

110 fragment_size = FLAGS_sample_rate / 100;

111

112 stat(in_name, &in_stat);

113 stat(noise_name, &noise_stat);

114 samples = in_stat.st_size / sizeof(*in_ipcm);

115

116 in_fd = open(in_name, O_RDONLY);

117 if (!strcmp(out_name, "-")) {

118 aplay_file = popen("aplay -t au", "w");

119 out_fd = fileno(aplay_file);

120 } else {

121 out_fd = open(out_name, O_WRONLY \| O_CREAT \| O_TRUNC,

122 S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP \| S_IROTH \| S_IWOTH);

123 }

124 noise_fd = open(noise_name, O_RDONLY);

125

126 in_ipcm = static_cast<int16_t*>(mmap(nullptr, in_stat.st_size, PROT_READ,

127 MAP_PRIVATE, in_fd, 0));

128 noise_ipcm = static_cast<int16_t*>(mmap(nullptr, noise_stat.st_size,

129 PROT_READ, MAP_PRIVATE, noise_fd, 0));

130 out_ipcm = new int16_t[samples];

131 out_fpcm = new float[samples];

132 in_fpcm = new float[samples];

133 noise_fpcm = new float[samples];

134

135 for (int i = 0; i < samples; ++i) {

136 noise_fpcm[i] = noise_ipcm[i % (noise_stat.st_size / sizeof(*noise_ipcm))];

137 }

138

139 //feenableexcept(FE_INVALID \| FE_OVERFLOW);

140 IntelligibilityEnhancer enh(2,

141 FLAGS_sample_rate, 1,

142 FLAGS_clear_type,

143 static_cast<float>(FLAGS_clear_alpha),

144 FLAGS_clear_window,

145 FLAGS_ana_rate,

146 FLAGS_var_rate,

147 FLAGS_gain_limit);

148

149 // Slice the input into smaller chunks, as the APM would do, and feed them

150 // into the enhancer. Repeat indefinitely if FLAGS_repeat is set.

151 do {

152 noise_cursor = noise_fpcm;

153 clear_cursor = in_fpcm;

154 for (int i = 0; i < samples; ++i) {

155 in_fpcm[i] = in_ipcm[i];

156 }

157

158 for (int i = 0; i < samples; i += fragment_size) {

159 enh.ProcessCaptureAudio(&noise_cursor);

160 enh.ProcessRenderAudio(&clear_cursor);

161 clear_cursor += fragment_size;

162 noise_cursor += fragment_size;

163 }

164

165 for (int i = 0; i < samples; ++i) {

166 out_ipcm[i] = static_cast<float>(in_fpcm[i]);

167 }

168 if (!strcmp(out_name, "-")) {

169 writeau(out_fd);

170 } else {

171 write(out_fd, out_ipcm, samples * sizeof(*out_ipcm));

172 }

173 } while (FLAGS_repeat);

174

175 munmap(const_cast<int16_t*>(noise_ipcm), noise_stat.st_size);

176 munmap(const_cast<int16_t*>(in_ipcm), in_stat.st_size);

177 close(noise_fd);

178 if (aplay_file) {

179 pclose(aplay_file);

180 } else {

181 close(out_fd);

182 }

183 close(in_fd);

184

185 return 0;	145 return 0;

186 }	146 }

187

OLD	NEW