webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc - Issue 1729753003: Fix the stereo support in IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc

Issue 1729753003: Fix the stereo support in IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@gains2

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //

12 // Command line tool for speech intelligibility enhancement. Provides for

13 // running and testing intelligibility_enhancer as an independent process.

14 // Use --help for options.

15 //

16

17 #include <sys/stat.h>

18

19 #include "gflags/gflags.h"	11 #include "gflags/gflags.h"

20 #include "testing/gtest/include/gtest/gtest.h"	12 #include "testing/gtest/include/gtest/gtest.h"

21 #include "webrtc/base/criticalsection.h"	13 #include "webrtc/base/criticalsection.h"

	14 #include "webrtc/common_audio/channel_buffer.h"

22 #include "webrtc/common_audio/include/audio_util.h"	15 #include "webrtc/common_audio/include/audio_util.h"

23 #include "webrtc/common_audio/wav_file.h"	16 #include "webrtc/common_audio/wav_file.h"

24 #include "webrtc/modules/audio_processing/audio_buffer.h"	17 #include "webrtc/modules/audio_processing/audio_buffer.h"

25 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	18 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

26 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"	19 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"

27	20

28 using std::complex;	21 using std::complex;

29	22

30 namespace webrtc {	23 namespace webrtc {

31 namespace {	24 namespace {

32	25

33 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");	26 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");

34 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");	27 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");

35 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");	28 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");

36	29

37 // void function for gtest	30 // void function for gtest

38 void void_main(int argc, char* argv[]) {	31 void void_main(int argc, char* argv[]) {

39 google::SetUsageMessage(	32 google::SetUsageMessage(

40 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n");	33 "\n\nInput files must be little-endian 16-bit signed raw PCM.\n");

41 google::ParseCommandLineFlags(&argc, &argv, true);	34 google::ParseCommandLineFlags(&argc, &argv, true);

42	35

43 // Load settings and wav input.

44 struct stat in_stat, noise_stat;

45 ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0)

46 << "Empty speech file.";

47 ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0)

48 << "Empty noise file.";

49

50 const size_t samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;

51

52 WavReader in_file(FLAGS_clear_file);	36 WavReader in_file(FLAGS_clear_file);

53 std::vector<float> in_fpcm(samples);

54 in_file.ReadSamples(samples, &in_fpcm[0]);

55 FloatS16ToFloat(&in_fpcm[0], samples, &in_fpcm[0]);

56

57 WavReader noise_file(FLAGS_noise_file);	37 WavReader noise_file(FLAGS_noise_file);

58 std::vector<float> noise_fpcm(samples);	38 WavWriter out_file(FLAGS_out_file,

59 noise_file.ReadSamples(samples, &noise_fpcm[0]);	39 in_file.sample_rate(),

60 FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]);	40 in_file.num_channels());

61

62 // Run intelligibility enhancement.

63 IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels());	41 IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels());

64 rtc::CriticalSection crit;	42 rtc::CriticalSection crit;

65 NoiseSuppressionImpl ns(&crit);	43 NoiseSuppressionImpl ns(&crit);

66 ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());	44 ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());

67 ns.Enable(true);	45 ns.Enable(true);

68	46 const size_t in_samples = noise_file.sample_rate() / 100;

69 // Mirror real time APM chunk size. Duplicates chunk_length_ in	47 const size_t noise_samples = noise_file.sample_rate() / 100;
	hlundin-webrtc 2016/02/24 10:17:03 What if the sample rates differ? Is that allowed a What if the sample rates differ? Is that allowed and handled? aluebs-webrtc 2016/02/25 00:18:37 Yes it is, since the sample rate of the noise only Show quoted text On 2016/02/24 10:17:03, hlundin-webrtc wrote: > What if the sample rates differ? Is that allowed and handled? Yes it is, since the sample rate of the noise only matters to the NS and the one of the input only matters to the IE. hlundin-webrtc 2016/02/26 21:58:57 Acknowledged. Show quoted text On 2016/02/25 00:18:37, aluebs-webrtc wrote: > On 2016/02/24 10:17:03, hlundin-webrtc wrote: > > What if the sample rates differ? Is that allowed and handled? > > Yes it is, since the sample rate of the noise only matters to the NS and the one > of the input only matters to the IE. Acknowledged.
70 // IntelligibilityEnhancer.	48 std::vector<float> in(in_samples * in_file.num_channels());

71 size_t fragment_size = in_file.sample_rate() / 100;	49 std::vector<float> noise(noise_samples * noise_file.num_channels());

72 AudioBuffer capture_audio(fragment_size, noise_file.num_channels(),	50 ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());

73 fragment_size, noise_file.num_channels(),	51 ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());

74 fragment_size);	52 AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),

75 StreamConfig stream_config(in_file.sample_rate(), noise_file.num_channels());	53 noise_samples, noise_file.num_channels(),

76	54 noise_samples);

77 // Slice the input into smaller chunks, as the APM would do, and feed them	55 StreamConfig stream_config(noise_file.sample_rate(),

78 // through the enhancer.	56 noise_file.num_channels());

79 float* clear_cursor = &in_fpcm[0];	57 while (in_file.ReadSamples(in.size(), &in[0]) == in.size() &&
	hlundin-webrtc 2016/02/24 10:17:03 in.data() several places below in.data() several places below aluebs-webrtc 2016/02/25 00:18:37 Done. Show quoted text On 2016/02/24 10:17:03, hlundin-webrtc wrote: > in.data() > several places below Done.
80 float* noise_cursor = &noise_fpcm[0];	58 noise_file.ReadSamples(noise.size(), &noise[0]) == noise.size()) {
	hlundin-webrtc 2016/02/24 10:17:03 noise.data() several places below noise.data() several places below aluebs-webrtc 2016/02/25 00:18:37 Done. Show quoted text On 2016/02/24 10:17:03, hlundin-webrtc wrote: > noise.data() > several places below Done.
81	59 FloatS16ToFloat(&in[0], in.size(), &in[0]);

82 for (size_t i = 0; i < samples; i += fragment_size) {	60 FloatS16ToFloat(&noise[0], noise.size(), &noise[0]);
	turaj 2016/02/24 16:00:17 NOT RELATED TO THIS CL--Is is guaranteed that in p NOT RELATED TO THIS CL--Is is guaranteed that in practice near end and far end signals to APM are in the same range? I guess no other module requires this. aluebs-webrtc 2016/02/25 00:18:37 That is a good question. Testing with audioproc th Show quoted text On 2016/02/24 16:00:17, turaj wrote: > NOT RELATED TO THIS CL--Is is guaranteed that in practice near end and far end > signals to APM are in the same range? I guess no other module requires this. That is a good question. Testing with audioproc they are and I want to hope this holds for all platforms. It should, since in an aecdump the render and capture signals are in the same range, right?
83 capture_audio.CopyFrom(&noise_cursor, stream_config);	61 Deinterleave(&in[0],

	62 in_buf.num_frames(),

	63 in_buf.num_channels(),

	64 in_buf.channels());

	65 Deinterleave(&noise[0],

	66 noise_buf.num_frames(),

	67 noise_buf.num_channels(),

	68 noise_buf.channels());

	69 capture_audio.CopyFrom(noise_buf.channels(), stream_config);

84 ns.AnalyzeCaptureAudio(&capture_audio);	70 ns.AnalyzeCaptureAudio(&capture_audio);

85 ns.ProcessCaptureAudio(&capture_audio);	71 ns.ProcessCaptureAudio(&capture_audio);

86 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());	72 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());

87 enh.ProcessRenderAudio(&clear_cursor, in_file.sample_rate(),	73 enh.ProcessRenderAudio(in_buf.channels(),

	74 in_file.sample_rate(),

88 in_file.num_channels());	75 in_file.num_channels());

89 clear_cursor += fragment_size;	76 Interleave(in_buf.channels(),

90 noise_cursor += fragment_size;	77 in_buf.num_frames(),

	78 in_buf.num_channels(),

	79 &in[0]);

	80 FloatToFloatS16(&in[0], in.size(), &in[0]);

	81 out_file.WriteSamples(&in[0], in.size());

91 }	82 }

92

93 FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]);

94

95 WavWriter out_file(FLAGS_out_file,

96 in_file.sample_rate(),

97 in_file.num_channels());

98 out_file.WriteSamples(&in_fpcm[0], samples);

99 }	83 }

100	84

101 } // namespace	85 } // namespace

102 } // namespace webrtc	86 } // namespace webrtc

103	87

104 int main(int argc, char* argv[]) {	88 int main(int argc, char* argv[]) {

105 webrtc::void_main(argc, argv);	89 webrtc::void_main(argc, argv);

106 return 0;	90 return 0;

107 }	91 }

OLD	NEW