webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1685703004: Fix and simplify the power estimation in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@ie

Patch Set: Address turajs comments Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « no previous file | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 //

12 // Specifies core class for intelligbility enhancement.

13 //

14

15 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

16 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

17	13

18 #include <complex>	14 #include <complex>

19 #include <vector>	15 #include <vector>

20	16

21 #include "webrtc/base/scoped_ptr.h"	17 #include "webrtc/base/scoped_ptr.h"

22 #include "webrtc/common_audio/lapped_transform.h"	18 #include "webrtc/common_audio/lapped_transform.h"

23 #include "webrtc/common_audio/channel_buffer.h"	19 #include "webrtc/common_audio/channel_buffer.h"

24 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	20 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

25	21

26 namespace webrtc {	22 namespace webrtc {

27	23

28 // Speech intelligibility enhancement module. Reads render and capture	24 // Speech intelligibility enhancement module. Reads render and capture

29 // audio streams and modifies the render stream with a set of gains per	25 // audio streams and modifies the render stream with a set of gains per

30 // frequency bin to enhance speech against the noise background.	26 // frequency bin to enhance speech against the noise background.

31 // Note: assumes speech and noise streams are already separated.	27 // Details of the model and algorithm can be found in the original paper:

	28 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788

32 class IntelligibilityEnhancer {	29 class IntelligibilityEnhancer {

33 public:	30 public:

34 struct Config {	31 struct Config {

35 // \|var_*\| are parameters for the VarianceArray constructor for the	32 // TODO(bercic): the \|decay_rate\|, \|analysis_rate\| and \|gain_limit\|

36 // clear speech stream.	33 // parameters should probably go away once fine tuning is done.

37 // TODO(bercic): the \|var_\|, \|_rate\| and \|gain_limit\| parameters should

38 // probably go away once fine tuning is done.

39 Config()	34 Config()

40 : sample_rate_hz(16000),	35 : sample_rate_hz(16000),

41 num_capture_channels(1),	36 num_capture_channels(1),

42 num_render_channels(1),	37 num_render_channels(1),

43 var_type(intelligibility::VarianceArray::kStepDecaying),	38 decay_rate(0.9f),

44 var_decay_rate(0.9f),	39 analysis_rate(60),

45 var_window_size(10),

46 analysis_rate(800),

47 gain_change_limit(0.1f),	40 gain_change_limit(0.1f),

48 rho(0.02f) {}	41 rho(0.02f) {}

49 int sample_rate_hz;	42 int sample_rate_hz;

50 size_t num_capture_channels;	43 size_t num_capture_channels;

51 size_t num_render_channels;	44 size_t num_render_channels;

52 intelligibility::VarianceArray::StepType var_type;	45 float decay_rate;

53 float var_decay_rate;

54 size_t var_window_size;

55 int analysis_rate;	46 int analysis_rate;

56 float gain_change_limit;	47 float gain_change_limit;

57 float rho;	48 float rho;

58 };	49 };

59	50

60 explicit IntelligibilityEnhancer(const Config& config);	51 explicit IntelligibilityEnhancer(const Config& config);

61 IntelligibilityEnhancer(); // Initialize with default config.	52 IntelligibilityEnhancer(); // Initialize with default config.

62	53

63 // Sets the capture noise magnitude spectrum estimate.	54 // Sets the capture noise magnitude spectrum estimate.

64 void SetCaptureNoiseEstimate(std::vector<float> noise);	55 void SetCaptureNoiseEstimate(std::vector<float> noise);

(...skipping 18 matching lines...) Expand all Loading...
83 size_t out_channels,	74 size_t out_channels,

84 std::complex<float>* const* out_block) override;	75 std::complex<float>* const* out_block) override;

85	76

86 private:	77 private:

87 IntelligibilityEnhancer* parent_;	78 IntelligibilityEnhancer* parent_;

88 };	79 };

89 friend class TransformCallback;	80 friend class TransformCallback;

90 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);	81 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);

91 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);	82 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

92	83

93 // Updates variance computation and analysis with \|in_block_\|,	84 // Updates power computation and analysis with \|in_block_\|,

94 // and writes modified speech to \|out_block\|.	85 // and writes modified speech to \|out_block\|.

95 void ProcessClearBlock(const std::complex<float>* in_block,	86 void ProcessClearBlock(const std::complex<float>* in_block,

96 std::complex<float>* out_block);	87 std::complex<float>* out_block);

97	88

98 // Computes and sets modified gains.	89 // Computes and sets modified gains.

99 void AnalyzeClearBlock(float power_target);	90 void AnalyzeClearBlock();

100	91

101 // Bisection search for optimal \|lambda\|.	92 // Bisection search for optimal \|lambda\|.

102 void SolveForLambda(float power_target, float power_bot, float power_top);	93 void SolveForLambda(float power_target, float power_bot, float power_top);

103	94

104 // Transforms freq gains to ERB gains.	95 // Transforms freq gains to ERB gains.

105 void UpdateErbGains();	96 void UpdateErbGains();

106	97

107 // Returns number of ERB filters.	98 // Returns number of ERB filters.

108 static size_t GetBankSize(int sample_rate, size_t erb_resolution);	99 static size_t GetBankSize(int sample_rate, size_t erb_resolution);

109	100

(...skipping 10 matching lines...) Expand all Loading...
120 const size_t bank_size_; // Num ERB filters.	111 const size_t bank_size_; // Num ERB filters.

121 const int sample_rate_hz_;	112 const int sample_rate_hz_;

122 const int erb_resolution_;	113 const int erb_resolution_;

123 const size_t num_capture_channels_;	114 const size_t num_capture_channels_;

124 const size_t num_render_channels_;	115 const size_t num_render_channels_;

125 const int analysis_rate_; // Num blocks before gains recalculated.	116 const int analysis_rate_; // Num blocks before gains recalculated.

126	117

127 const bool active_; // Whether render gains are being updated.	118 const bool active_; // Whether render gains are being updated.

128 // TODO(ekm): Add logic for updating \|active_\|.	119 // TODO(ekm): Add logic for updating \|active_\|.

129	120

130 intelligibility::VarianceArray clear_variance_;	121 PowerEstimator clear_power_;

131 std::vector<float> noise_power_;	122 std::vector<float> noise_power_;

132 rtc::scoped_ptr<float[]> filtered_clear_var_;	123 rtc::scoped_ptr<float[]> filtered_clear_pow_;

133 rtc::scoped_ptr<float[]> filtered_noise_var_;	124 rtc::scoped_ptr<float[]> filtered_noise_pow_;

134 rtc::scoped_ptr<float[]> center_freqs_;	125 rtc::scoped_ptr<float[]> center_freqs_;

135 std::vector<std::vector<float>> capture_filter_bank_;	126 std::vector<std::vector<float>> capture_filter_bank_;

136 std::vector<std::vector<float>> render_filter_bank_;	127 std::vector<std::vector<float>> render_filter_bank_;

137 size_t start_freq_;	128 size_t start_freq_;

138 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.	129 rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.

139 // for each ERB band.	130 // for each ERB band.

140 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.	131 rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.

141 intelligibility::GainApplier gain_applier_;	132 GainApplier gain_applier_;

142	133

143 // Destination buffers used to reassemble blocked chunks before overwriting	134 // Destination buffers used to reassemble blocked chunks before overwriting

144 // the original input array with modifications.	135 // the original input array with modifications.

145 ChannelBuffer<float> temp_render_out_buffer_;	136 ChannelBuffer<float> temp_render_out_buffer_;

146	137

147 rtc::scoped_ptr<float[]> kbd_window_;	138 rtc::scoped_ptr<float[]> kbd_window_;

148 TransformCallback render_callback_;	139 TransformCallback render_callback_;

149 rtc::scoped_ptr<LappedTransform> render_mangler_;	140 rtc::scoped_ptr<LappedTransform> render_mangler_;

150 int block_count_;	141 int block_count_;

151 int analysis_step_;	142 int analysis_step_;

152 };	143 };

153	144

154 } // namespace webrtc	145 } // namespace webrtc

155	146

156 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	147 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW