webrtc/modules/audio_processing/aec3/aec_state.cc - Issue 2974583004: Transparency improvements in the echo canceller 3

Side by Side Diff: webrtc/modules/audio_processing/aec3/aec_state.cc

Issue 2974583004: Transparency improvements in the echo canceller 3 (Closed)

Patch Set: Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc ('K') | « webrtc/modules/audio_processing/aec3/aec_state.h ('k') | webrtc/modules/audio_processing/aec3/aec_state_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
71 return rtc::Optional<size_t>();	71 return rtc::Optional<size_t>();

72 }	72 }

73	73

74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;	74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;

75 constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;	75 constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;

76	76

77 } // namespace	77 } // namespace

78	78

79 int AecState::instance_count_ = 0;	79 int AecState::instance_count_ = 0;

80	80

81 AecState::AecState(float echo_decay)	81 AecState::AecState(float reverb_decay)

82 : data_dumper_(	82 : data_dumper_(

83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),	83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),

84 echo_path_change_counter_(kEchoPathChangeCounterInitial),	84 echo_path_change_counter_(kEchoPathChangeCounterInitial),

85 echo_decay_factor_(echo_decay) {}	85 reverb_decay_(reverb_decay) {}

86	86

87 AecState::~AecState() = default;	87 AecState::~AecState() = default;

88	88

89 void AecState::HandleEchoPathChange(	89 void AecState::HandleEchoPathChange(

90 const EchoPathVariability& echo_path_variability) {	90 const EchoPathVariability& echo_path_variability) {

91 if (echo_path_variability.AudioPathChanged()) {	91 if (echo_path_variability.AudioPathChanged()) {

92 blocks_since_last_saturation_ = 0;	92 blocks_since_last_saturation_ = 0;

93 usable_linear_estimate_ = false;	93 usable_linear_estimate_ = false;

94 echo_leakage_detected_ = false;	94 echo_leakage_detected_ = false;

95 capture_signal_saturation_ = false;	95 capture_signal_saturation_ = false;

96 echo_saturation_ = false;	96 echo_saturation_ = false;

97 previous_max_sample_ = 0.f;	97 previous_max_sample_ = 0.f;

98	98

99 if (echo_path_variability.delay_change) {	99 if (echo_path_variability.delay_change) {

100 force_zero_gain_counter_ = 0;	100 force_zero_gain_counter_ = 0;

101 blocks_with_filter_adaptation_ = 0;	101 blocks_with_filter_adaptation_ = 0;

102 render_received_ = false;	102 render_received_ = false;

103 force_zero_gain_ = true;	103 force_zero_gain_ = true;

104 echo_path_change_counter_ = kEchoPathChangeCounterMax;	104 echo_path_change_counter_ = kEchoPathChangeCounterMax;

105 }	105 }

106 if (echo_path_variability.gain_change) {	106 if (echo_path_variability.gain_change) {

107 echo_path_change_counter_ = kEchoPathChangeCounterInitial;	107 echo_path_change_counter_ = kEchoPathChangeCounterInitial;

108 }	108 }

109 }	109 }

110 }	110 }

111	111

112 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&	112 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&

113 adaptive_filter_frequency_response,	113 adaptive_filter_frequency_response,

	114 const std::array<float, kAdaptiveFilterTimeDomainLength>&

	115 adaptive_filter_impulse_response,

114 const rtc::Optional<size_t>& external_delay_samples,	116 const rtc::Optional<size_t>& external_delay_samples,

115 const RenderBuffer& render_buffer,	117 const RenderBuffer& render_buffer,

116 const std::array<float, kFftLengthBy2Plus1>& E2_main,	118 const std::array<float, kFftLengthBy2Plus1>& E2_main,

117 const std::array<float, kFftLengthBy2Plus1>& Y2,	119 const std::array<float, kFftLengthBy2Plus1>& Y2,

118 rtc::ArrayView<const float> x,	120 rtc::ArrayView<const float> x,

	121 const std::array<float, kBlockSize>& s,

119 bool echo_leakage_detected) {	122 bool echo_leakage_detected) {

	123 // Update the echo audibility evaluator.

	124 echo_audibility_.Update(x, s);

	125

120 // Store input parameters.	126 // Store input parameters.

121 echo_leakage_detected_ = echo_leakage_detected;	127 echo_leakage_detected_ = echo_leakage_detected;

122	128

123 // Update counters.	129 // Update counters.

124 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);	130 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);

125 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;	131 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;

126 if (active_render_block) {	132 if (active_render_block) {

127 render_received_ = true;	133 render_received_ = true;

128 }	134 }

129 blocks_with_filter_adaptation_ +=	135 blocks_with_filter_adaptation_ +=

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
172 blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&	178 blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&

173 filter_delay_ && echo_path_change_counter_ <= 0;	179 filter_delay_ && echo_path_change_counter_ <= 0;

174	180

175 // After an amount of active render samples for which an echo should have been	181 // After an amount of active render samples for which an echo should have been

176 // detected in the capture signal if the ERL was not infinite, flag that a	182 // detected in the capture signal if the ERL was not infinite, flag that a

177 // headset is used.	183 // headset is used.

178 headset_detected_ =	184 headset_detected_ =

179 !external_delay_ && !filter_delay_ &&	185 !external_delay_ && !filter_delay_ &&

180 (!render_received_ \|\|	186 (!render_received_ \|\|

181 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);	187 blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);

	188

	189 // Update the room reverb estimate.

	190 UpdateReverb(adaptive_filter_impulse_response);

	191 }

	192

	193 void AecState::UpdateReverb(

	194 const std::array<float, kAdaptiveFilterTimeDomainLength>&

	195 impulse_response) {

	196 if ((!(filter_delay_ && usable_linear_estimate_)) \|\|

	197 (*filter_delay_ > kAdaptiveFilterLength - 4)) {

	198 return;

	199 }

	200

	201 // Form the data to match against by squaring the impulse response

	202 // coefficients.

	203 std::array<float, kAdaptiveFilterTimeDomainLength> matching_data;

	204 std::transform(impulse_response.begin(), impulse_response.end(),

	205 matching_data.begin(), [](float a) { return a * a; });

	206

	207 // Avoid matching against noise in the model by subtracting an estimate of the

	208 // model noise power.

	209 constexpr size_t kTailLength = 64;

	210 constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength;

	211 const float tail_power = *std::max_element(matching_data.begin() + tail_index,

	212 matching_data.end());

	213 std::for_each(matching_data.begin(), matching_data.begin() + tail_index,

	214 [tail_power](float& a) { a = std::max(0.f, a - tail_power); });

	215

	216 // Identify the peak index of the impulse response.

	217 size_t peak_index = 0;

	218 float peak_value = matching_data[0];

	219 for (size_t k = 1; k < tail_index; ++k) {
	ivoc 2017/07/10 13:53:55 You could use std::max_element instead of this loo You could use std::max_element instead of this loop. peah-webrtc 2017/07/10 23:18:09 Good point! Done. Show quoted text On 2017/07/10 13:53:55, ivoc wrote: > You could use std::max_element instead of this loop. Good point! Done.
	220 if (matching_data[k] > peak_value) {

	221 peak_value = matching_data[k];

	222 peak_index = k;

	223 }

	224 }

	225

	226 if (peak_index + 128 < tail_index) {

	227 size_t start_index = peak_index + 64;

	228 // Compute the matching residual error for the current candidate to match.

	229 float residual_sqr_sum = 0.f;

	230 float d_k = reverb_decay_to_test_;

	231 for (size_t k = start_index; k < tail_index; ++k) {

	232 if (matching_data[start_index + 1] == 0.f) {

	233 break;

	234 }

	235

	236 float residual = matching_data[k] - matching_data[peak_index] * d_k;

	237 residual_sqr_sum += residual * residual;

	238 d_k *= reverb_decay_to_test_;

	239 }

	240

	241 // If needed, update the best candidate for the reverb decay.

	242 if (reverb_decay_candidate_residual_ < 0.f \|\|

	243 residual_sqr_sum < reverb_decay_candidate_residual_) {

	244 reverb_decay_candidate_residual_ = residual_sqr_sum;

	245 reverb_decay_candidate_ = reverb_decay_to_test_;

	246 }

	247 }

	248

	249 // Compute the next reverb candidate to evaluate such that all candidates will

	250 // be evaluated within one second.

	251 reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond);

	252

	253 // If all reverb candidates have been evaluated, choose the best one as the

	254 // reverb decay.

	255 if (reverb_decay_to_test_ >= 0.9965f) {

	256 reverb_decay_to_test_ = 0.9f;

	257 reverb_decay_candidate_residual_ = -1.f;

	258

	259 if (reverb_decay_candidate_residual_ < 0.f) {
	ivoc 2017/07/10 13:53:55 This is always true (since reverb_decay_candidate_ This is always true (since reverb_decay_candidate_residual_ is set to -1 on the previous line) peah-webrtc 2017/07/10 23:18:09 Good find! Done. Show quoted text On 2017/07/10 13:53:55, ivoc wrote: > This is always true (since reverb_decay_candidate_residual_ is set to -1 on the > previous line) Good find! Done.
	260 // Transform the decay to be in the unit of blocks.

	261 reverb_decay_ = 1.f;

	262 for (size_t k = 0; k < kFftLengthBy2; ++k) {

	263 reverb_decay_ *= reverb_decay_candidate_;
	ivoc 2017/07/10 13:53:55 Is this for-loop more efficient than using pow? Is this for-loop more efficient than using pow? peah-webrtc 2017/07/10 23:18:09 That seems a bit unclear. However, this is only do Show quoted text On 2017/07/10 13:53:55, ivoc wrote: > Is this for-loop more efficient than using pow? That seems a bit unclear. However, this is only done once per frame, so I think for code clarity, it makes sense to go for pow. Done.
	264 }

	265

	266 // Limit the estimated reverb_decay_ to the maximum one needed in practice

	267 // to minimize the impact of incorrect estimates.

	268 reverb_decay_ = std::min(0.8f, reverb_decay_);

	269 }

	270 }

	271

	272 // For noisy impulse responses, assume a fixed tail length.

	273 if (tail_power > 0.0005f) {

	274 reverb_decay_ = 0.7f;

	275 }

	276 data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_);

	277 data_dumper_->DumpRaw("aec3_tail_power", tail_power);

	278 }

	279

	280 void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,

	281 const std::array<float, kBlockSize>& s) {

	282 const float x_max = *std::max_element(x.begin(), x.end());

	283 const float x_min = *std::min_element(x.begin(), x.end());
	ivoc 2017/07/10 13:53:55 There is a std::minmax function that calculates bo There is a std::minmax function that calculates both the max and min in a single pass. I'm not sure if we can use it (it's a C++11 library), but it would be more efficient. peah-webrtc 2017/07/10 23:18:09 Ah! Great! I did not know about that one. Afaics, Show quoted text On 2017/07/10 13:53:55, ivoc wrote: > There is a std::minmax function that calculates both the max and min in a single > pass. I'm not sure if we can use it (it's a C++11 library), but it would be more > efficient. Ah! Great! I did not know about that one. Afaics, we are allowed to use that, so I change to using that. Done.
	284 const float s_max = *std::max_element(s.begin(), s.end());

	285 const float s_min = *std::min_element(s.begin(), s.end());

	286 const float s_abs = std::max(std::abs(s_max), std::abs(s_min));

	287 const float x_abs = std::max(std::abs(x_max), std::abs(x_min));

	288

	289 if (x_abs < 5.f) {

	290 ++low_farend_counter_;

	291 } else {

	292 low_farend_counter_ = 0;

	293 }

	294

	295 // The echo is deemed as not audible if the echo estimate is on the level of

	296 // the quantization noise in the FFTs and the nearend level is sufficiently

	297 // strong to mask that by ensuring that the playout and AGC gains do not boost

	298 // any residual echo that is below the quantization noise level. Furthermore,

	299 // cases where the render signal is very close to zero are also identified as

	300 // not producing audible echo.

	301 inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;

	302 inaudible_echo_ = inaudible_echo_ \|\| low_farend_counter_ > 20;

	303 }

	304

	305 void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) {

	306 const float e_max = *std::max_element(e.begin(), e.end());

	307 const float e_min = *std::min_element(e.begin(), e.end());

	308 const float e_abs = std::max(std::abs(e_max), std::abs(e_min));

	309

	310 if (max_nearend_ < e_abs) {

	311 max_nearend_ = e_abs;

	312 max_nearend_counter_ = 0;

	313 } else {

	314 if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) {

	315 max_nearend_ *= 0.995f;

	316 }

	317 }

182 }	318 }

183	319

184 } // namespace webrtc	320 } // namespace webrtc

OLD	NEW