webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc - Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc

Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ie

Patch Set: Address turajs comments Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc ('k') | webrtc/modules/audio_processing/noise_suppression_impl.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //	11 //

12 // Command line tool for speech intelligibility enhancement. Provides for	12 // Command line tool for speech intelligibility enhancement. Provides for

13 // running and testing intelligibility_enhancer as an independent process.	13 // running and testing intelligibility_enhancer as an independent process.

14 // Use --help for options.	14 // Use --help for options.

15 //	15 //

16	16

17 #include <stdint.h>	17 #include <stdint.h>

18 #include <stdlib.h>	18 #include <stdlib.h>

19 #include <sys/stat.h>	19 #include <sys/stat.h>

20 #include <sys/types.h>	20 #include <sys/types.h>

21 #include <string>	21 #include <string>

22	22

23 #include "gflags/gflags.h"	23 #include "gflags/gflags.h"

24 #include "testing/gtest/include/gtest/gtest.h"	24 #include "testing/gtest/include/gtest/gtest.h"

25 #include "webrtc/base/checks.h"	25 #include "webrtc/base/checks.h"

26 #include "webrtc/base/criticalsection.h"	26 #include "webrtc/base/criticalsection.h"

	27 #include "webrtc/common_audio/include/audio_util.h"

27 #include "webrtc/common_audio/real_fourier.h"	28 #include "webrtc/common_audio/real_fourier.h"

28 #include "webrtc/common_audio/wav_file.h"	29 #include "webrtc/common_audio/wav_file.h"

29 #include "webrtc/modules/audio_processing/audio_buffer.h"	30 #include "webrtc/modules/audio_processing/audio_buffer.h"

30 #include "webrtc/modules/audio_processing/include/audio_processing.h"	31 #include "webrtc/modules/audio_processing/include/audio_processing.h"

31 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	32 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

32 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	33 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

33 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"	34 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"

34 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"	35 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"

35 #include "webrtc/test/testsupport/fileutils.h"	36 #include "webrtc/test/testsupport/fileutils.h"

36	37

37 using std::complex;	38 using std::complex;

38 using webrtc::intelligibility::VarianceArray;

39	39

40 namespace webrtc {	40 namespace webrtc {

41 namespace {	41 namespace {

42	42

43 bool ValidateClearWindow(const char* flagname, int32_t value) {	43 DEFINE_double(clear_alpha, 0.9, "Power decay factor for clear data.");

44 return value > 0;

45 }

46

47 DEFINE_int32(clear_type,

48 webrtc::intelligibility::VarianceArray::kStepDecaying,

49 "Variance algorithm for clear data.");

50 DEFINE_double(clear_alpha, 0.9, "Variance decay factor for clear data.");

51 DEFINE_int32(clear_window,

52 475,

53 "Window size for windowed variance for clear data.");

54 const bool clear_window_dummy =

55 google::RegisterFlagValidator(&FLAGS_clear_window, &ValidateClearWindow);

56 DEFINE_int32(sample_rate,	44 DEFINE_int32(sample_rate,

57 16000,	45 16000,

58 "Audio sample rate used in the input and output files.");	46 "Audio sample rate used in the input and output files.");

59 DEFINE_int32(ana_rate,	47 DEFINE_int32(ana_rate,

60 800,	48 60,

61 "Analysis rate; gains recalculated every N blocks.");	49 "Analysis rate; gains recalculated every N blocks.");

62 DEFINE_int32(

63 var_rate,

64 2,

65 "Variance clear rate; history is forgotten every N gain recalculations.");

66 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");	50 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");

67	51

68 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");	52 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");

69 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");	53 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");

70 DEFINE_string(out_file,	54 DEFINE_string(out_file,

71 "proc_enhanced.wav",	55 "proc_enhanced.wav",

72 "Enhanced output. Use '-' to "	56 "Enhanced output. Use '-' to "

73 "play through aplay immediately.");	57 "play through aplay immediately.");

74	58

75 const size_t kNumChannels = 1;	59 const size_t kNumChannels = 1;

76	60

77 // void function for gtest	61 // void function for gtest

78 void void_main(int argc, char* argv[]) {	62 void void_main(int argc, char* argv[]) {

79 google::SetUsageMessage(	63 google::SetUsageMessage(

80 "\n\nVariance algorithm types are:\n"	64 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n");

81 " 0 - infinite/normal,\n"

82 " 1 - exponentially decaying,\n"

83 " 2 - rolling window.\n"

84 "\nInput files must be little-endian 16-bit signed raw PCM.\n");

85 google::ParseCommandLineFlags(&argc, &argv, true);	65 google::ParseCommandLineFlags(&argc, &argv, true);

86	66

87 size_t samples; // Number of samples in input PCM file	67 size_t samples; // Number of samples in input PCM file

88 size_t fragment_size; // Number of samples to process at a time	68 size_t fragment_size; // Number of samples to process at a time

89 // to simulate APM stream processing	69 // to simulate APM stream processing

90	70

91 // Load settings and wav input.	71 // Load settings and wav input.

92	72

93 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size.	73 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size.

94 // Duplicates chunk_length_ in	74 // Duplicates chunk_length_ in

95 // IntelligibilityEnhancer.	75 // IntelligibilityEnhancer.

96	76

97 struct stat in_stat, noise_stat;	77 struct stat in_stat, noise_stat;

98 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0)	78 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0)

99 << "Empty speech file.";	79 << "Empty speech file.";

100 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0)	80 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0)

101 << "Empty noise file.";	81 << "Empty noise file.";

102	82

103 samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;	83 samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;

104	84

105 WavReader in_file(FLAGS_clear_file);	85 WavReader in_file(FLAGS_clear_file);

106 std::vector<float> in_fpcm(samples);	86 std::vector<float> in_fpcm(samples);

107 in_file.ReadSamples(samples, &in_fpcm[0]);	87 in_file.ReadSamples(samples, &in_fpcm[0]);

	88 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]);

108	89

109 WavReader noise_file(FLAGS_noise_file);	90 WavReader noise_file(FLAGS_noise_file);

110 std::vector<float> noise_fpcm(samples);	91 std::vector<float> noise_fpcm(samples);

111 noise_file.ReadSamples(samples, &noise_fpcm[0]);	92 noise_file.ReadSamples(samples, &noise_fpcm[0]);

	93 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]);

112	94

113 // Run intelligibility enhancement.	95 // Run intelligibility enhancement.

114 IntelligibilityEnhancer::Config config;	96 IntelligibilityEnhancer::Config config;

115 config.sample_rate_hz = FLAGS_sample_rate;	97 config.sample_rate_hz = FLAGS_sample_rate;

116 config.var_type = static_cast<VarianceArray::StepType>(FLAGS_clear_type);	98 config.decay_rate = static_cast<float>(FLAGS_clear_alpha);

117 config.var_decay_rate = static_cast<float>(FLAGS_clear_alpha);

118 config.var_window_size = static_cast<size_t>(FLAGS_clear_window);

119 config.analysis_rate = FLAGS_ana_rate;	99 config.analysis_rate = FLAGS_ana_rate;

120 config.gain_change_limit = FLAGS_gain_limit;	100 config.gain_change_limit = FLAGS_gain_limit;

121 IntelligibilityEnhancer enh(config);	101 IntelligibilityEnhancer enh(config);

122 rtc::CriticalSection crit;	102 rtc::CriticalSection crit;

123 NoiseSuppressionImpl ns(&crit);	103 NoiseSuppressionImpl ns(&crit);

124 ns.Initialize(kNumChannels, FLAGS_sample_rate);	104 ns.Initialize(kNumChannels, FLAGS_sample_rate);

125 ns.Enable(true);	105 ns.Enable(true);

126	106

127 AudioBuffer capture_audio(fragment_size,	107 AudioBuffer capture_audio(fragment_size,

128 kNumChannels,	108 kNumChannels,

(...skipping 10 matching lines...) Expand all Loading...
139 for (size_t i = 0; i < samples; i += fragment_size) {	119 for (size_t i = 0; i < samples; i += fragment_size) {

140 capture_audio.CopyFrom(&noise_cursor, stream_config);	120 capture_audio.CopyFrom(&noise_cursor, stream_config);

141 ns.AnalyzeCaptureAudio(&capture_audio);	121 ns.AnalyzeCaptureAudio(&capture_audio);

142 ns.ProcessCaptureAudio(&capture_audio);	122 ns.ProcessCaptureAudio(&capture_audio);

143 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());	123 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());

144 enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels);	124 enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels);

145 clear_cursor += fragment_size;	125 clear_cursor += fragment_size;

146 noise_cursor += fragment_size;	126 noise_cursor += fragment_size;

147 }	127 }

148	128

	129 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]);

	130

149 if (FLAGS_out_file.compare("-") == 0) {	131 if (FLAGS_out_file.compare("-") == 0) {

150 const std::string temp_out_filename =	132 const std::string temp_out_filename =

151 test::TempFilename(test::WorkingDir(), "temp_wav_file");	133 test::TempFilename(test::WorkingDir(), "temp_wav_file");

152 {	134 {

153 WavWriter out_file(temp_out_filename, FLAGS_sample_rate, kNumChannels);	135 WavWriter out_file(temp_out_filename, FLAGS_sample_rate, kNumChannels);

154 out_file.WriteSamples(&in_fpcm[0], samples);	136 out_file.WriteSamples(&in_fpcm[0], samples);

155 }	137 }

156 system(("aplay " + temp_out_filename).c_str());	138 system(("aplay " + temp_out_filename).c_str());

157 system(("rm " + temp_out_filename).c_str());	139 system(("rm " + temp_out_filename).c_str());

158 } else {	140 } else {

159 WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels);	141 WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels);

160 out_file.WriteSamples(&in_fpcm[0], samples);	142 out_file.WriteSamples(&in_fpcm[0], samples);

161 }	143 }

162 }	144 }

163	145

164 } // namespace	146 } // namespace

165 } // namespace webrtc	147 } // namespace webrtc

166	148

167 int main(int argc, char* argv[]) {	149 int main(int argc, char* argv[]) {

168 webrtc::void_main(argc, argv);	150 webrtc::void_main(argc, argv);

169 return 0;	151 return 0;

170 }	152 }

OLD	NEW