OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 12 matching lines...) Expand all Loading... |
23 #include "webrtc/common_audio/wav_file.h" | 23 #include "webrtc/common_audio/wav_file.h" |
24 #include "webrtc/modules/audio_processing/audio_buffer.h" | 24 #include "webrtc/modules/audio_processing/audio_buffer.h" |
25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc
er.h" | 25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc
er.h" |
26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" | 26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" |
27 | 27 |
28 using std::complex; | 28 using std::complex; |
29 | 29 |
30 namespace webrtc { | 30 namespace webrtc { |
31 namespace { | 31 namespace { |
32 | 32 |
33 DEFINE_double(clear_alpha, 0.9, "Power decay factor for clear data."); | |
34 DEFINE_int32(sample_rate, | |
35 16000, | |
36 "Audio sample rate used in the input and output files."); | |
37 DEFINE_int32(ana_rate, | |
38 60, | |
39 "Analysis rate; gains recalculated every N blocks."); | |
40 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block."); | |
41 | |
42 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); | 33 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); |
43 DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); | 34 DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); |
44 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); | 35 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); |
45 | 36 |
46 const size_t kNumChannels = 1; | |
47 | |
48 // void function for gtest | 37 // void function for gtest |
49 void void_main(int argc, char* argv[]) { | 38 void void_main(int argc, char* argv[]) { |
50 google::SetUsageMessage( | 39 google::SetUsageMessage( |
51 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); | 40 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); |
52 google::ParseCommandLineFlags(&argc, &argv, true); | 41 google::ParseCommandLineFlags(&argc, &argv, true); |
53 | 42 |
54 size_t samples; // Number of samples in input PCM file | |
55 size_t fragment_size; // Number of samples to process at a time | |
56 // to simulate APM stream processing | |
57 | |
58 // Load settings and wav input. | 43 // Load settings and wav input. |
59 | |
60 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size. | |
61 // Duplicates chunk_length_ in | |
62 // IntelligibilityEnhancer. | |
63 | |
64 struct stat in_stat, noise_stat; | 44 struct stat in_stat, noise_stat; |
65 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0) | 45 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0) |
66 << "Empty speech file."; | 46 << "Empty speech file."; |
67 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0) | 47 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0) |
68 << "Empty noise file."; | 48 << "Empty noise file."; |
69 | 49 |
70 samples = std::min(in_stat.st_size, noise_stat.st_size) / 2; | 50 const size_t samples = std::min(in_stat.st_size, noise_stat.st_size) / 2; |
71 | 51 |
72 WavReader in_file(FLAGS_clear_file); | 52 WavReader in_file(FLAGS_clear_file); |
73 std::vector<float> in_fpcm(samples); | 53 std::vector<float> in_fpcm(samples); |
74 in_file.ReadSamples(samples, &in_fpcm[0]); | 54 in_file.ReadSamples(samples, &in_fpcm[0]); |
75 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]); | 55 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]); |
76 | 56 |
77 WavReader noise_file(FLAGS_noise_file); | 57 WavReader noise_file(FLAGS_noise_file); |
78 std::vector<float> noise_fpcm(samples); | 58 std::vector<float> noise_fpcm(samples); |
79 noise_file.ReadSamples(samples, &noise_fpcm[0]); | 59 noise_file.ReadSamples(samples, &noise_fpcm[0]); |
80 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]); | 60 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]); |
81 | 61 |
82 // Run intelligibility enhancement. | 62 // Run intelligibility enhancement. |
83 IntelligibilityEnhancer::Config config; | 63 IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels()); |
84 config.sample_rate_hz = FLAGS_sample_rate; | |
85 config.decay_rate = static_cast<float>(FLAGS_clear_alpha); | |
86 config.analysis_rate = FLAGS_ana_rate; | |
87 config.gain_change_limit = FLAGS_gain_limit; | |
88 IntelligibilityEnhancer enh(config); | |
89 rtc::CriticalSection crit; | 64 rtc::CriticalSection crit; |
90 NoiseSuppressionImpl ns(&crit); | 65 NoiseSuppressionImpl ns(&crit); |
91 ns.Initialize(kNumChannels, FLAGS_sample_rate); | 66 ns.Initialize(noise_file.num_channels(), noise_file.sample_rate()); |
92 ns.Enable(true); | 67 ns.Enable(true); |
93 | 68 |
94 AudioBuffer capture_audio(fragment_size, | 69 // Mirror real time APM chunk size. Duplicates chunk_length_ in |
95 kNumChannels, | 70 // IntelligibilityEnhancer. |
96 fragment_size, | 71 size_t fragment_size = in_file.sample_rate() / 100; |
97 kNumChannels, | 72 AudioBuffer capture_audio(fragment_size, noise_file.num_channels(), |
| 73 fragment_size, noise_file.num_channels(), |
98 fragment_size); | 74 fragment_size); |
99 StreamConfig stream_config(FLAGS_sample_rate, kNumChannels); | 75 StreamConfig stream_config(in_file.sample_rate(), noise_file.num_channels()); |
100 | 76 |
101 // Slice the input into smaller chunks, as the APM would do, and feed them | 77 // Slice the input into smaller chunks, as the APM would do, and feed them |
102 // through the enhancer. | 78 // through the enhancer. |
103 float* clear_cursor = &in_fpcm[0]; | 79 float* clear_cursor = &in_fpcm[0]; |
104 float* noise_cursor = &noise_fpcm[0]; | 80 float* noise_cursor = &noise_fpcm[0]; |
105 | 81 |
106 for (size_t i = 0; i < samples; i += fragment_size) { | 82 for (size_t i = 0; i < samples; i += fragment_size) { |
107 capture_audio.CopyFrom(&noise_cursor, stream_config); | 83 capture_audio.CopyFrom(&noise_cursor, stream_config); |
108 ns.AnalyzeCaptureAudio(&capture_audio); | 84 ns.AnalyzeCaptureAudio(&capture_audio); |
109 ns.ProcessCaptureAudio(&capture_audio); | 85 ns.ProcessCaptureAudio(&capture_audio); |
110 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate()); | 86 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate()); |
111 enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels); | 87 enh.ProcessRenderAudio(&clear_cursor, in_file.sample_rate(), |
| 88 in_file.num_channels()); |
112 clear_cursor += fragment_size; | 89 clear_cursor += fragment_size; |
113 noise_cursor += fragment_size; | 90 noise_cursor += fragment_size; |
114 } | 91 } |
115 | 92 |
116 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]); | 93 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]); |
117 | 94 |
118 WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels); | 95 WavWriter out_file(FLAGS_out_file, |
| 96 in_file.sample_rate(), |
| 97 in_file.num_channels()); |
119 out_file.WriteSamples(&in_fpcm[0], samples); | 98 out_file.WriteSamples(&in_fpcm[0], samples); |
120 } | 99 } |
121 | 100 |
122 } // namespace | 101 } // namespace |
123 } // namespace webrtc | 102 } // namespace webrtc |
124 | 103 |
125 int main(int argc, char* argv[]) { | 104 int main(int argc, char* argv[]) { |
126 webrtc::void_main(argc, argv); | 105 webrtc::void_main(argc, argv); |
127 return 0; | 106 return 0; |
128 } | 107 } |
OLD | NEW |