| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 #include "webrtc/common_audio/wav_file.h" | 23 #include "webrtc/common_audio/wav_file.h" |
| 24 #include "webrtc/modules/audio_processing/audio_buffer.h" | 24 #include "webrtc/modules/audio_processing/audio_buffer.h" |
| 25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc
er.h" | 25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc
er.h" |
| 26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" | 26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h" |
| 27 | 27 |
| 28 using std::complex; | 28 using std::complex; |
| 29 | 29 |
| 30 namespace webrtc { | 30 namespace webrtc { |
| 31 namespace { | 31 namespace { |
| 32 | 32 |
| 33 DEFINE_double(clear_alpha, 0.9, "Power decay factor for clear data."); | |
| 34 DEFINE_int32(sample_rate, | |
| 35 16000, | |
| 36 "Audio sample rate used in the input and output files."); | |
| 37 DEFINE_int32(ana_rate, | |
| 38 60, | |
| 39 "Analysis rate; gains recalculated every N blocks."); | |
| 40 DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block."); | |
| 41 | |
| 42 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); | 33 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); |
| 43 DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); | 34 DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); |
| 44 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); | 35 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); |
| 45 | 36 |
| 46 const size_t kNumChannels = 1; | |
| 47 | |
| 48 // void function for gtest | 37 // void function for gtest |
| 49 void void_main(int argc, char* argv[]) { | 38 void void_main(int argc, char* argv[]) { |
| 50 google::SetUsageMessage( | 39 google::SetUsageMessage( |
| 51 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); | 40 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); |
| 52 google::ParseCommandLineFlags(&argc, &argv, true); | 41 google::ParseCommandLineFlags(&argc, &argv, true); |
| 53 | 42 |
| 54 size_t samples; // Number of samples in input PCM file | |
| 55 size_t fragment_size; // Number of samples to process at a time | |
| 56 // to simulate APM stream processing | |
| 57 | |
| 58 // Load settings and wav input. | 43 // Load settings and wav input. |
| 59 | |
| 60 fragment_size = FLAGS_sample_rate / 100; // Mirror real time APM chunk size. | |
| 61 // Duplicates chunk_length_ in | |
| 62 // IntelligibilityEnhancer. | |
| 63 | |
| 64 struct stat in_stat, noise_stat; | 44 struct stat in_stat, noise_stat; |
| 65 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0) | 45 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0) |
| 66 << "Empty speech file."; | 46 << "Empty speech file."; |
| 67 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0) | 47 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0) |
| 68 << "Empty noise file."; | 48 << "Empty noise file."; |
| 69 | 49 |
| 70 samples = std::min(in_stat.st_size, noise_stat.st_size) / 2; | 50 const size_t samples = std::min(in_stat.st_size, noise_stat.st_size) / 2; |
| 71 | 51 |
| 72 WavReader in_file(FLAGS_clear_file); | 52 WavReader in_file(FLAGS_clear_file); |
| 73 std::vector<float> in_fpcm(samples); | 53 std::vector<float> in_fpcm(samples); |
| 74 in_file.ReadSamples(samples, &in_fpcm[0]); | 54 in_file.ReadSamples(samples, &in_fpcm[0]); |
| 75 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]); | 55 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]); |
| 76 | 56 |
| 77 WavReader noise_file(FLAGS_noise_file); | 57 WavReader noise_file(FLAGS_noise_file); |
| 78 std::vector<float> noise_fpcm(samples); | 58 std::vector<float> noise_fpcm(samples); |
| 79 noise_file.ReadSamples(samples, &noise_fpcm[0]); | 59 noise_file.ReadSamples(samples, &noise_fpcm[0]); |
| 80 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]); | 60 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]); |
| 81 | 61 |
| 82 // Run intelligibility enhancement. | 62 // Run intelligibility enhancement. |
| 83 IntelligibilityEnhancer::Config config; | 63 IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels()); |
| 84 config.sample_rate_hz = FLAGS_sample_rate; | |
| 85 config.decay_rate = static_cast<float>(FLAGS_clear_alpha); | |
| 86 config.analysis_rate = FLAGS_ana_rate; | |
| 87 config.gain_change_limit = FLAGS_gain_limit; | |
| 88 IntelligibilityEnhancer enh(config); | |
| 89 rtc::CriticalSection crit; | 64 rtc::CriticalSection crit; |
| 90 NoiseSuppressionImpl ns(&crit); | 65 NoiseSuppressionImpl ns(&crit); |
| 91 ns.Initialize(kNumChannels, FLAGS_sample_rate); | 66 ns.Initialize(noise_file.num_channels(), noise_file.sample_rate()); |
| 92 ns.Enable(true); | 67 ns.Enable(true); |
| 93 | 68 |
| 94 AudioBuffer capture_audio(fragment_size, | 69 // Mirror real time APM chunk size. Duplicates chunk_length_ in |
| 95 kNumChannels, | 70 // IntelligibilityEnhancer. |
| 96 fragment_size, | 71 size_t fragment_size = in_file.sample_rate() / 100; |
| 97 kNumChannels, | 72 AudioBuffer capture_audio(fragment_size, noise_file.num_channels(), |
| 73 fragment_size, noise_file.num_channels(), |
| 98 fragment_size); | 74 fragment_size); |
| 99 StreamConfig stream_config(FLAGS_sample_rate, kNumChannels); | 75 StreamConfig stream_config(in_file.sample_rate(), noise_file.num_channels()); |
| 100 | 76 |
| 101 // Slice the input into smaller chunks, as the APM would do, and feed them | 77 // Slice the input into smaller chunks, as the APM would do, and feed them |
| 102 // through the enhancer. | 78 // through the enhancer. |
| 103 float* clear_cursor = &in_fpcm[0]; | 79 float* clear_cursor = &in_fpcm[0]; |
| 104 float* noise_cursor = &noise_fpcm[0]; | 80 float* noise_cursor = &noise_fpcm[0]; |
| 105 | 81 |
| 106 for (size_t i = 0; i < samples; i += fragment_size) { | 82 for (size_t i = 0; i < samples; i += fragment_size) { |
| 107 capture_audio.CopyFrom(&noise_cursor, stream_config); | 83 capture_audio.CopyFrom(&noise_cursor, stream_config); |
| 108 ns.AnalyzeCaptureAudio(&capture_audio); | 84 ns.AnalyzeCaptureAudio(&capture_audio); |
| 109 ns.ProcessCaptureAudio(&capture_audio); | 85 ns.ProcessCaptureAudio(&capture_audio); |
| 110 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate()); | 86 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate()); |
| 111 enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels); | 87 enh.ProcessRenderAudio(&clear_cursor, in_file.sample_rate(), |
| 88 in_file.num_channels()); |
| 112 clear_cursor += fragment_size; | 89 clear_cursor += fragment_size; |
| 113 noise_cursor += fragment_size; | 90 noise_cursor += fragment_size; |
| 114 } | 91 } |
| 115 | 92 |
| 116 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]); | 93 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]); |
| 117 | 94 |
| 118 WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels); | 95 WavWriter out_file(FLAGS_out_file, |
| 96 in_file.sample_rate(), |
| 97 in_file.num_channels()); |
| 119 out_file.WriteSamples(&in_fpcm[0], samples); | 98 out_file.WriteSamples(&in_fpcm[0], samples); |
| 120 } | 99 } |
| 121 | 100 |
| 122 } // namespace | 101 } // namespace |
| 123 } // namespace webrtc | 102 } // namespace webrtc |
| 124 | 103 |
| 125 int main(int argc, char* argv[]) { | 104 int main(int argc, char* argv[]) { |
| 126 webrtc::void_main(argc, argv); | 105 webrtc::void_main(argc, argv); |
| 127 return 0; | 106 return 0; |
| 128 } | 107 } |
| OLD | NEW |