webrtc/modules/audio_processing/aec3/aec_state.cc - Issue 2974583004: Transparency improvements in the echo canceller 3

Side by Side Diff: webrtc/modules/audio_processing/aec3/aec_state.cc

Issue 2974583004: Transparency improvements in the echo canceller 3 (Closed)

Patch Set: Corrected wrong echo estimate vector in unittest Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
71 return rtc::Optional<size_t>();	71 return rtc::Optional<size_t>();

72 }	72 }

73	73

74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;	74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;

75 constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;	75 constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;

76	76

77 } // namespace	77 } // namespace

78	78

79 int AecState::instance_count_ = 0;	79 int AecState::instance_count_ = 0;

80	80

81 AecState::AecState(float echo_decay)	81 AecState::AecState(float reverb_decay)

82 : data_dumper_(	82 : data_dumper_(

83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),	83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),

84 echo_path_change_counter_(kEchoPathChangeCounterInitial),	84 echo_path_change_counter_(kEchoPathChangeCounterInitial),

85 echo_decay_factor_(echo_decay) {}	85 reverb_decay_(reverb_decay) {}

86	86

87 AecState::~AecState() = default;	87 AecState::~AecState() = default;

88	88

89 void AecState::HandleEchoPathChange(	89 void AecState::HandleEchoPathChange(

90 const EchoPathVariability& echo_path_variability) {	90 const EchoPathVariability& echo_path_variability) {

91 if (echo_path_variability.AudioPathChanged()) {	91 if (echo_path_variability.AudioPathChanged()) {

92 blocks_since_last_saturation_ = 0;	92 blocks_since_last_saturation_ = 0;

93 usable_linear_estimate_ = false;	93 usable_linear_estimate_ = false;

94 echo_leakage_detected_ = false;	94 echo_leakage_detected_ = false;

95 capture_signal_saturation_ = false;	95 capture_signal_saturation_ = false;

96 echo_saturation_ = false;	96 echo_saturation_ = false;

97 previous_max_sample_ = 0.f;	97 previous_max_sample_ = 0.f;

98	98

99 if (echo_path_variability.delay_change) {	99 if (echo_path_variability.delay_change) {

100 force_zero_gain_counter_ = 0;	100 force_zero_gain_counter_ = 0;

101 blocks_with_filter_adaptation_ = 0;	101 blocks_with_filter_adaptation_ = 0;

102 render_received_ = false;	102 render_received_ = false;

103 force_zero_gain_ = true;	103 force_zero_gain_ = true;

104 echo_path_change_counter_ = kEchoPathChangeCounterMax;	104 echo_path_change_counter_ = kEchoPathChangeCounterMax;

105 }	105 }

106 if (echo_path_variability.gain_change) {	106 if (echo_path_variability.gain_change) {

107 echo_path_change_counter_ = kEchoPathChangeCounterInitial;	107 echo_path_change_counter_ = kEchoPathChangeCounterInitial;

108 }	108 }

109 }	109 }

110 }	110 }

111	111

112 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&	112 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&

113 adaptive_filter_frequency_response,	113 adaptive_filter_frequency_response,

	114 const std::array<float, kAdaptiveFilterTimeDomainLength>&

	115 adaptive_filter_impulse_response,

114 const rtc::Optional<size_t>& external_delay_samples,	116 const rtc::Optional<size_t>& external_delay_samples,

115 const RenderBuffer& render_buffer,	117 const RenderBuffer& render_buffer,

116 const std::array<float, kFftLengthBy2Plus1>& E2_main,	118 const std::array<float, kFftLengthBy2Plus1>& E2_main,

117 const std::array<float, kFftLengthBy2Plus1>& Y2,	119 const std::array<float, kFftLengthBy2Plus1>& Y2,

118 rtc::ArrayView<const float> x,	120 rtc::ArrayView<const float> x,

	121 const std::array<float, kBlockSize>& s,

119 bool echo_leakage_detected) {	122 bool echo_leakage_detected) {

	123 // Update the echo audibility evaluator.

	124 echo_audibility_.Update(x, s);

	125

120 // Store input parameters.	126 // Store input parameters.

121 echo_leakage_detected_ = echo_leakage_detected;	127 echo_leakage_detected_ = echo_leakage_detected;

122	128

123 // Update counters.	129 // Update counters.

124 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);	130 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);

125 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;	131 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;

126 if (active_render_block) {	132 if (active_render_block) {

127 render_received_ = true;	133 render_received_ = true;

128 }	134 }

129 blocks_with_filter_adaptation_ +=	135 blocks_with_filter_adaptation_ +=

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
172 blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&	178 blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&

173 filter_delay_ && echo_path_change_counter_ <= 0;	179 filter_delay_ && echo_path_change_counter_ <= 0;

174	180

175 // After an amount of active render samples for which an echo should have been	181 // After an amount of active render samples for which an echo should have been

176 // detected in the capture signal if the ERL was not infinite, flag that a	182 // detected in the capture signal if the ERL was not infinite, flag that a

177 // headset is used.	183 // headset is used.

178 headset_detected_ =	184 headset_detected_ =

179 !external_delay_ && !filter_delay_ &&	185 !external_delay_ && !filter_delay_ &&

180 (!render_received_ \|\|	186 (!render_received_ \|\|

181 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);	187 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);

	188

	189 // Update the room reverb estimate.

	190 UpdateReverb(adaptive_filter_impulse_response);

	191 }

	192

	193 void AecState::UpdateReverb(

	194 const std::array<float, kAdaptiveFilterTimeDomainLength>&

	195 impulse_response) {

	196 if ((!(filter_delay_ && usable_linear_estimate_)) \|\|

	197 (*filter_delay_ > kAdaptiveFilterLength - 4)) {

	198 return;

	199 }

	200

	201 // Form the data to match against by squaring the impulse response

	202 // coefficients.

	203 std::array<float, kAdaptiveFilterTimeDomainLength> matching_data;

	204 std::transform(impulse_response.begin(), impulse_response.end(),

	205 matching_data.begin(), [](float a) { return a * a; });

	206

	207 // Avoid matching against noise in the model by subtracting an estimate of the

	208 // model noise power.

	209 constexpr size_t kTailLength = 64;

	210 constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength;

	211 const float tail_power = *std::max_element(matching_data.begin() + tail_index,

	212 matching_data.end());

	213 std::for_each(matching_data.begin(), matching_data.begin() + tail_index,

	214 [tail_power](float& a) { a = std::max(0.f, a - tail_power); });

	215

	216 // Identify the peak index of the impulse response.

	217 const size_t peak_index = *std::max_element(

	218 matching_data.begin(), matching_data.begin() + tail_index);

	219

	220 if (peak_index + 128 < tail_index) {

	221 size_t start_index = peak_index + 64;

	222 // Compute the matching residual error for the current candidate to match.

	223 float residual_sqr_sum = 0.f;

	224 float d_k = reverb_decay_to_test_;

	225 for (size_t k = start_index; k < tail_index; ++k) {

	226 if (matching_data[start_index + 1] == 0.f) {

	227 break;

	228 }

	229

	230 float residual = matching_data[k] - matching_data[peak_index] * d_k;

	231 residual_sqr_sum += residual * residual;

	232 d_k *= reverb_decay_to_test_;

	233 }

	234

	235 // If needed, update the best candidate for the reverb decay.

	236 if (reverb_decay_candidate_residual_ < 0.f \|\|

	237 residual_sqr_sum < reverb_decay_candidate_residual_) {

	238 reverb_decay_candidate_residual_ = residual_sqr_sum;

	239 reverb_decay_candidate_ = reverb_decay_to_test_;

	240 }

	241 }

	242

	243 // Compute the next reverb candidate to evaluate such that all candidates will

	244 // be evaluated within one second.

	245 reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond);

	246

	247 // If all reverb candidates have been evaluated, choose the best one as the

	248 // reverb decay.

	249 if (reverb_decay_to_test_ >= 0.9965f) {

	250 if (reverb_decay_candidate_residual_ < 0.f) {

	251 // Transform the decay to be in the unit of blocks.

	252 reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2);

	253

	254 // Limit the estimated reverb_decay_ to the maximum one needed in practice

	255 // to minimize the impact of incorrect estimates.

	256 reverb_decay_ = std::min(0.8f, reverb_decay_);

	257 }

	258 reverb_decay_to_test_ = 0.9f;

	259 reverb_decay_candidate_residual_ = -1.f;

	260 }

	261

	262 // For noisy impulse responses, assume a fixed tail length.

	263 if (tail_power > 0.0005f) {

	264 reverb_decay_ = 0.7f;

	265 }

	266 data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_);

	267 data_dumper_->DumpRaw("aec3_tail_power", tail_power);

	268 }

	269

	270 void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,

	271 const std::array<float, kBlockSize>& s) {

	272 auto result_x = std::minmax_element(x.begin(), x.end());

	273 auto result_s = std::minmax_element(s.begin(), s.end());

	274 const float x_abs =

	275 std::max(std::abs(result_x.first), std::abs(result_x.second));

	276 const float s_abs =

	277 std::max(std::abs(result_s.first), std::abs(result_s.second));

	278

	279 if (x_abs < 5.f) {

	280 ++low_farend_counter_;

	281 } else {

	282 low_farend_counter_ = 0;

	283 }

	284

	285 // The echo is deemed as not audible if the echo estimate is on the level of

	286 // the quantization noise in the FFTs and the nearend level is sufficiently

	287 // strong to mask that by ensuring that the playout and AGC gains do not boost

	288 // any residual echo that is below the quantization noise level. Furthermore,

	289 // cases where the render signal is very close to zero are also identified as

	290 // not producing audible echo.

	291 inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;

	292 inaudible_echo_ = inaudible_echo_ \|\| low_farend_counter_ > 20;

	293 }

	294

	295 void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) {

	296 const float e_max = *std::max_element(e.begin(), e.end());

	297 const float e_min = *std::min_element(e.begin(), e.end());

	298 const float e_abs = std::max(std::abs(e_max), std::abs(e_min));

	299

	300 if (max_nearend_ < e_abs) {

	301 max_nearend_ = e_abs;

	302 max_nearend_counter_ = 0;

	303 } else {

	304 if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) {

	305 max_nearend_ *= 0.995f;

	306 }

	307 }

182 }	308 }

183	309

184 } // namespace webrtc	310 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/aec3/aec_state.h ('k') | webrtc/modules/audio_processing/aec3/aec_state_unittest.cc » ('j') | no next file with comments »