webrtc/modules/audio_processing/aec3/aec_state.cc - Issue 2782423003: Major updates to the echo removal functionality in AEC3

Side by Side Diff: webrtc/modules/audio_processing/aec3/aec_state.cc

Issue 2782423003: Major updates to the echo removal functionality in AEC3 (Closed)

Patch Set: Changes in response to reviewer comments Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc ('K') | « webrtc/modules/audio_processing/aec3/aec_state.h ('k') | webrtc/modules/audio_processing/aec3/aec_state_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/aec3/aec_state.h"	11 #include "webrtc/modules/audio_processing/aec3/aec_state.h"

12	12

13 #include <math.h>	13 #include <math.h>

14 #include <numeric>	14 #include <numeric>

15 #include <vector>	15 #include <vector>

16	16

	17 #include "webrtc/base/array_view.h"

17 #include "webrtc/base/atomicops.h"	18 #include "webrtc/base/atomicops.h"

18 #include "webrtc/base/checks.h"	19 #include "webrtc/base/checks.h"

19 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"	20 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

20	21

21 namespace webrtc {	22 namespace webrtc {

22 namespace {	23 namespace {

23	24

24 constexpr float kMaxFilterEstimateStrength = 1000.f;	25 constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond;

	26 constexpr size_t kSaturationLeakageBlocks = 20;

25	27

26 // Compute the delay of the adaptive filter as the partition with a distinct	28 // Computes delay of the adaptive filter.

27 // peak.	29 rtc::Optional<size_t> EstimateFilterDelay(

28 void AnalyzeFilter(

29 const std::vector<std::array<float, kFftLengthBy2Plus1>>&	30 const std::vector<std::array<float, kFftLengthBy2Plus1>>&

30 filter_frequency_response,	31 adaptive_filter_frequency_response) {

31 std::array<bool, kFftLengthBy2Plus1>* bands_with_reliable_filter,	32 const auto& H2 = adaptive_filter_frequency_response;

32 std::array<float, kFftLengthBy2Plus1>* filter_estimate_strength,

33 rtc::Optional<size_t>* filter_delay) {

34 const auto& H2 = filter_frequency_response;

35	33

36 size_t reliable_delays_sum = 0;	34 size_t reliable_delays_sum = 0;

37 size_t num_reliable_delays = 0;	35 size_t num_reliable_delays = 0;

38	36

39 constexpr size_t kUpperBin = kFftLengthBy2 - 5;	37 constexpr size_t kUpperBin = kFftLengthBy2 - 5;

	38 constexpr float kMinPeakMargin = 10.f;

	39 const size_t kTailPartition = H2.size() - 1;

40 for (size_t k = 1; k < kUpperBin; ++k) {	40 for (size_t k = 1; k < kUpperBin; ++k) {

	41 // Find the maximum of H2[j].

41 int peak = 0;	42 int peak = 0;

42 for (size_t j = 0; j < H2.size(); ++j) {	43 for (size_t j = 0; j < H2.size(); ++j) {

43 if (H2[j][k] > H2[peak][k]) {	44 if (H2[j][k] > H2[peak][k]) {

44 peak = j;	45 peak = j;

45 }	46 }

46 }	47 }

47	48

48 if (H2[peak][k] == 0.f) {	49 // Count the peak as a delay only if the peak is sufficiently larger than

49 (*filter_estimate_strength)[k] = 0.f;	50 // the tail.

50 } else if (H2[H2.size() - 1][k] == 0.f) {	51 if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) {

51 (*filter_estimate_strength)[k] = kMaxFilterEstimateStrength;

52 } else {

53 (*filter_estimate_strength)[k] = std::min(

54 kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]);

55 }

56

57 constexpr float kMargin = 10.f;

58 if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) {

59 (*bands_with_reliable_filter)[k] = true;

60 reliable_delays_sum += peak;	52 reliable_delays_sum += peak;

61 ++num_reliable_delays;	53 ++num_reliable_delays;

62 } else {

63 (*bands_with_reliable_filter)[k] = false;

64 }	54 }

65 }	55 }

66 (bands_with_reliable_filter)[0] = (bands_with_reliable_filter)[1];

67 std::fill(bands_with_reliable_filter->begin() + kUpperBin,

68 bands_with_reliable_filter->end(),

69 (*bands_with_reliable_filter)[kUpperBin - 1]);

70 (filter_estimate_strength)[0] = (filter_estimate_strength)[1];

71 std::fill(filter_estimate_strength->begin() + kUpperBin,

72 filter_estimate_strength->end(),

73 (*filter_estimate_strength)[kUpperBin - 1]);

74	56

75 *filter_delay =	57 // Return no delay if not sufficient delays have been found.

76 num_reliable_delays > 20	58 if (num_reliable_delays < 21) {

77 ? rtc::Optional<size_t>(reliable_delays_sum / num_reliable_delays)	59 return rtc::Optional<size_t>();

78 : rtc::Optional<size_t>();	60 }

	61

	62 const size_t delay = reliable_delays_sum / num_reliable_delays;

	63 // Sanity check that the peak is not caused by a false strong DC-component in

	64 // the filter.

	65 for (size_t k = 1; k < kUpperBin; ++k) {

	66 if (H2[delay][k] > H2[delay][0]) {

	67 RTC_DCHECK_GT(H2.size(), delay);

	68 return rtc::Optional<size_t>(delay);

	69 }

	70 }

	71 return rtc::Optional<size_t>();

79 }	72 }

80	73

81 constexpr int kActiveRenderCounterInitial = 50;	74 constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;

82 constexpr int kActiveRenderCounterMax = 200;	75 constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond;

83 constexpr int kEchoPathChangeCounterInitial = 50;

84 constexpr int kEchoPathChangeCounterMax = 3 * 250;

85	76

86 } // namespace	77 } // namespace

87	78

88 int AecState::instance_count_ = 0;	79 int AecState::instance_count_ = 0;

89	80

90 AecState::AecState()	81 AecState::AecState()

91 : data_dumper_(	82 : data_dumper_(

92 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),	83 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),

93 echo_path_change_counter_(kEchoPathChangeCounterInitial),	84 echo_path_change_counter_(kEchoPathChangeCounterInitial) {}

94 active_render_counter_(kActiveRenderCounterInitial) {

95 bands_with_reliable_filter_.fill(false);

96 filter_estimate_strength_.fill(0.f);

97 }

98	85

99 AecState::~AecState() = default;	86 AecState::~AecState() = default;

100	87

	88 void AecState::HandleEchoPathChange(

	89 const EchoPathVariability& echo_path_variability) {

	90 if (echo_path_variability.AudioPathChanged()) {

	91 blocks_since_last_saturation_ = 0;

	92 active_render_blocks_ = 0;

	93 echo_path_change_counter_ = kEchoPathChangeCounterMax;

	94 usable_linear_estimate_ = false;

	95 echo_leakage_detected_ = false;

	96 capture_signal_saturation_ = false;

	97 echo_saturation_ = false;

	98 headset_detected_ = false;

	99 previous_max_sample_ = 0.f;

	100 }

	101 }

	102

101 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&	103 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&

102 filter_frequency_response,	104 adaptive_filter_frequency_response,

103 const rtc::Optional<size_t>& external_delay_samples,	105 const rtc::Optional<size_t>& external_delay_samples,

104 const RenderBuffer& X_buffer,	106 const RenderBuffer& render_buffer,

105 const std::array<float, kFftLengthBy2Plus1>& E2_main,	107 const std::array<float, kFftLengthBy2Plus1>& E2_main,

106 const std::array<float, kFftLengthBy2Plus1>& E2_shadow,

107 const std::array<float, kFftLengthBy2Plus1>& Y2,	108 const std::array<float, kFftLengthBy2Plus1>& Y2,

108 rtc::ArrayView<const float> x,	109 rtc::ArrayView<const float> x,

109 const EchoPathVariability& echo_path_variability,

110 bool echo_leakage_detected) {	110 bool echo_leakage_detected) {

111 filter_length_ = filter_frequency_response.size();	111 // Store input parameters.

112 AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_,	112 echo_leakage_detected_ = echo_leakage_detected;

113 &filter_estimate_strength_, &filter_delay_);	113

114 // Compute the externally provided delay in partitions. The truncation is	114 // Update counters.

115 // intended here.	115 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);

	116 const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;

	117 active_render_blocks_ += active_render_block ? 1 : 0;

	118 --echo_path_change_counter_;

	119

	120 // Estimate delays.

	121 filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);

116 external_delay_ =	122 external_delay_ =

117 external_delay_samples	123 external_delay_samples

118 ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)	124 ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)

119 : rtc::Optional<size_t>();	125 : rtc::Optional<size_t>();

120	126

121 const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);	127 // Update the ERL and ERLE measures.

122	128 if (filter_delay_ && echo_path_change_counter_ <= 0) {

123 active_render_blocks_ =	129 const auto& X2 = render_buffer.Spectrum(*filter_delay_);

124 echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1;

125

126 echo_path_change_counter_ = echo_path_variability.AudioPathChanged()

127 ? kEchoPathChangeCounterMax

128 : echo_path_change_counter_ - 1;

129 active_render_counter_ = x_energy > 10000.f * kFftLengthBy2

130 ? kActiveRenderCounterMax

131 : active_render_counter_ - 1;

132

133 usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0;

134

135 echo_leakage_detected_ = echo_leakage_detected;

136

137 model_based_aec_feasible_ = usable_linear_estimate_ \|\| external_delay_;

138

139 if (usable_linear_estimate_) {

140 const auto& X2 = X_buffer.Spectrum(*filter_delay_);

141

142 // TODO(peah): Expose these as stats.

143 erle_estimator_.Update(X2, Y2, E2_main);	130 erle_estimator_.Update(X2, Y2, E2_main);

144 erl_estimator_.Update(X2, Y2);	131 erl_estimator_.Update(X2, Y2);

	132 }

145	133

146 // TODO(peah): Add working functionality for headset detection. Until the	134 // Detect and flag echo saturation.

147 // functionality for that is working the headset detector is hardcoded to detect	135 RTC_DCHECK_GT(0, x.size());
	ivoc 2017/04/06 09:09:53 Shouldn't this be RTC_DCHECK_GT(x.size(), 0)? Shouldn't this be RTC_DCHECK_GT(x.size(), 0)? peah-webrtc 2017/04/06 09:25:31 Of course, thanks!!! I changed it to RTC_DCHECK_LT Show quoted text On 2017/04/06 09:09:53, ivoc wrote: > Shouldn't this be RTC_DCHECK_GT(x.size(), 0)? Of course, thanks!!! I changed it to RTC_DCHECK_LT(0, x.size()); to get the error report correct. Done. hlundin-webrtc 2017/04/06 09:50:48 For RTC_(D)CHECK_* the order does not matter. You Show quoted text On 2017/04/06 09:25:31, peah-webrtc wrote: > On 2017/04/06 09:09:53, ivoc wrote: > > Shouldn't this be RTC_DCHECK_GT(x.size(), 0)? > > Of course, thanks!!! > I changed it to RTC_DCHECK_LT(0, x.size()); to get the error report correct. > > Done. For RTC_(D)CHECK_* the order does not matter. You get printouts like # Check failed: 17 <= 4 (17 vs. 4) or # Check failed: 17 == 4 (17 vs. 4) which don't label any of the arguments as "expected" or "actual". For gtest, the order matters sometimes. In particular for EXPECT_EQ and EXPECT_NE, which print out stuff like Value of: 4 Expected: 17 However, gtest's inequality tests don't care about ordering. For instance, EXPECT_LE prints out Expected: (17) <= (4), actual: 17 vs 4 peah-webrtc 2017/04/06 10:45:54 Ah, that I did not know, I thought the order conve Show quoted text On 2017/04/06 09:50:48, hlundin-webrtc wrote: > On 2017/04/06 09:25:31, peah-webrtc wrote: > > On 2017/04/06 09:09:53, ivoc wrote: > > > Shouldn't this be RTC_DCHECK_GT(x.size(), 0)? > > > > Of course, thanks!!! > > I changed it to RTC_DCHECK_LT(0, x.size()); to get the error report correct. > > > > Done. > > For RTC_(D)CHECK_* the order does not matter. You get printouts like > # Check failed: 17 <= 4 (17 vs. 4) > or > # Check failed: 17 == 4 (17 vs. 4) > which don't label any of the arguments as "expected" or "actual". > > For gtest, the order matters sometimes. In particular for EXPECT_EQ and > EXPECT_NE, which print out stuff like > Value of: 4 > Expected: 17 > > However, gtest's inequality tests don't care about ordering. For instance, > EXPECT_LE prints out > Expected: (17) <= (4), actual: 17 vs 4 Ah, that I did not know, I thought the order convention was due to the output formatting. But am I right in my impression that we always strive to put the thing compared against as the first argument, and the thing that is compared as the second argument? hlundin-webrtc 2017/04/06 11:13:26 The order convention is due to output formatting Show quoted text On 2017/04/06 10:45:54, peah-webrtc wrote: > On 2017/04/06 09:50:48, hlundin-webrtc wrote: > > On 2017/04/06 09:25:31, peah-webrtc wrote: > > > On 2017/04/06 09:09:53, ivoc wrote: > > > > Shouldn't this be RTC_DCHECK_GT(x.size(), 0)? > > > > > > Of course, thanks!!! > > > I changed it to RTC_DCHECK_LT(0, x.size()); to get the error report correct. > > > > > > Done. > > > > For RTC_(D)CHECK_* the order does not matter. You get printouts like > > # Check failed: 17 <= 4 (17 vs. 4) > > or > > # Check failed: 17 == 4 (17 vs. 4) > > which don't label any of the arguments as "expected" or "actual". > > > > For gtest, the order matters sometimes. In particular for EXPECT_EQ and > > EXPECT_NE, which print out stuff like > > Value of: 4 > > Expected: 17 > > > > However, gtest's inequality tests don't care about ordering. For instance, > > EXPECT_LE prints out > > Expected: (17) <= (4), actual: 17 vs 4 > > Ah, that I did not know, I thought the order convention was due to the output > formatting. But am I right in my impression that we always strive to put the > thing compared against as the first argument, and the thing that is compared as > the second argument? The order convention is due to output formatting, but it only applies where the output format actually makes a difference between the expected and actual value. None of the (D)CHECKs does that, and from the gtest suite, only _EQ and _NE tests do that.
148 // no headset.	136 const float max_sample = fabs(*std::max_element(

149 #if 0	137 x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));

150 const auto& erl = erl_estimator_.Erl();	138 const bool saturated_echo =

151 const int low_erl_band_count = std::count_if(	139 previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture();

152 erl.begin(), erl.end(), [](float a) { return a <= 0.1f; });	140 previous_max_sample_ = max_sample;

153	141

154 const int noisy_band_count = std::count_if(	142 // Counts the blocks since saturation.

155 filter_estimate_strength_.begin(), filter_estimate_strength_.end(),	143 blocks_since_last_saturation_ =

156 [](float a) { return a <= 10.f; });	144 saturated_echo ? 0 : blocks_since_last_saturation_ + 1;

157 headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20;	145 echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;

158 #endif	146

159 headset_detected_ = false;	147 // Flag whether the linear filter estimate is usable.

160 } else {	148 usable_linear_estimate_ =

161 headset_detected_ = false;	149 (!echo_saturation_) &&

162 }	150 active_render_blocks_ > kEchoPathChangeConvergenceBlocks &&

	151 filter_delay_ && echo_path_change_counter_ <= 0;

	152

	153 // After an amount of active render samples for which an echo should have been

	154 // detected in the capture signal if the ERL was not infinite, flag that a

	155 // headset is used.

	156 headset_detected_ = !external_delay_ && !filter_delay_ &&

	157 active_render_blocks_ >= kEchoPathChangeConvergenceBlocks;

163 }	158 }

164	159

165 } // namespace webrtc	160 } // namespace webrtc

OLD	NEW