webrtc/modules/audio_processing/aec3/echo_remover.cc - Issue 2782423003: Major updates to the echo removal functionality in AEC3

Side by Side Diff: webrtc/modules/audio_processing/aec3/echo_remover.cc

Issue 2782423003: Major updates to the echo removal functionality in AEC3 (Closed)

Patch Set: Changes in response to reviewer comments Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« webrtc/modules/audio_processing/aec3/aec_state.cc ('K') | « webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc ('k') | webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10 #include "webrtc/modules/audio_processing/aec3/echo_remover.h"	10 #include "webrtc/modules/audio_processing/aec3/echo_remover.h"

11	11

	12 #include <math.h>

12 #include <algorithm>	13 #include <algorithm>

13 #include <memory>	14 #include <memory>

14 #include <numeric>	15 #include <numeric>

15 #include <string>	16 #include <string>

16	17

17 #include "webrtc/base/array_view.h"	18 #include "webrtc/base/array_view.h"

18 #include "webrtc/base/atomicops.h"	19 #include "webrtc/base/atomicops.h"

19 #include "webrtc/base/constructormagic.h"	20 #include "webrtc/base/constructormagic.h"

20 #include "webrtc/modules/audio_processing/aec3/aec3_common.h"	21 #include "webrtc/modules/audio_processing/aec3/aec3_common.h"

21 #include "webrtc/modules/audio_processing/aec3/aec_state.h"	22 #include "webrtc/modules/audio_processing/aec3/aec_state.h"

22 #include "webrtc/modules/audio_processing/aec3/comfort_noise_generator.h"	23 #include "webrtc/modules/audio_processing/aec3/comfort_noise_generator.h"

23 #include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"	24 #include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"

24 #include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h"	25 #include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h"

25 #include "webrtc/modules/audio_processing/aec3/fft_data.h"	26 #include "webrtc/modules/audio_processing/aec3/fft_data.h"

26 #include "webrtc/modules/audio_processing/aec3/output_selector.h"	27 #include "webrtc/modules/audio_processing/aec3/output_selector.h"

27 #include "webrtc/modules/audio_processing/aec3/power_echo_model.h"

28 #include "webrtc/modules/audio_processing/aec3/render_buffer.h"	28 #include "webrtc/modules/audio_processing/aec3/render_buffer.h"

29 #include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h"	29 #include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h"

30 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"	30 #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"

31 #include "webrtc/modules/audio_processing/aec3/subtractor.h"	31 #include "webrtc/modules/audio_processing/aec3/subtractor.h"

32 #include "webrtc/modules/audio_processing/aec3/suppression_filter.h"	32 #include "webrtc/modules/audio_processing/aec3/suppression_filter.h"

33 #include "webrtc/modules/audio_processing/aec3/suppression_gain.h"	33 #include "webrtc/modules/audio_processing/aec3/suppression_gain.h"

34 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"	34 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"

35	35

36 namespace webrtc {	36 namespace webrtc {

37	37

38 namespace {	38 namespace {

39	39

40 void LinearEchoPower(const FftData& E,	40 void LinearEchoPower(const FftData& E,

41 const FftData& Y,	41 const FftData& Y,

42 std::array<float, kFftLengthBy2Plus1>* S2) {	42 std::array<float, kFftLengthBy2Plus1>* S2) {

43 for (size_t k = 0; k < E.re.size(); ++k) {	43 for (size_t k = 0; k < E.re.size(); ++k) {

44 (S2)[k] = (Y.re[k] - E.re[k]) (Y.re[k] - E.re[k]) +	44 (S2)[k] = (Y.re[k] - E.re[k]) (Y.re[k] - E.re[k]) +

45 (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]);	45 (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]);

46 }	46 }

47 }	47 }

48	48

49 float BlockPower(const std::array<float, kBlockSize> x) {

50 return std::accumulate(x.begin(), x.end(), 0.f,

51 [](float a, float b) -> float { return a + b * b; });

52 }

53

54 // Class for removing the echo from the capture signal.	49 // Class for removing the echo from the capture signal.

55 class EchoRemoverImpl final : public EchoRemover {	50 class EchoRemoverImpl final : public EchoRemover {

56 public:	51 public:

57 explicit EchoRemoverImpl(int sample_rate_hz);	52 explicit EchoRemoverImpl(int sample_rate_hz);

58 ~EchoRemoverImpl() override;	53 ~EchoRemoverImpl() override;

59	54

60 // Removes the echo from a block of samples from the capture signal. The	55 // Removes the echo from a block of samples from the capture signal. The

61 // supplied render signal is assumed to be pre-aligned with the capture	56 // supplied render signal is assumed to be pre-aligned with the capture

62 // signal.	57 // signal.

63 void ProcessCapture(	58 void ProcessCapture(

(...skipping 12 matching lines...) Expand all Loading...
76 private:	71 private:

77 static int instance_count_;	72 static int instance_count_;

78 const Aec3Fft fft_;	73 const Aec3Fft fft_;

79 std::unique_ptr<ApmDataDumper> data_dumper_;	74 std::unique_ptr<ApmDataDumper> data_dumper_;

80 const Aec3Optimization optimization_;	75 const Aec3Optimization optimization_;

81 const int sample_rate_hz_;	76 const int sample_rate_hz_;

82 Subtractor subtractor_;	77 Subtractor subtractor_;

83 SuppressionGain suppression_gain_;	78 SuppressionGain suppression_gain_;

84 ComfortNoiseGenerator cng_;	79 ComfortNoiseGenerator cng_;

85 SuppressionFilter suppression_filter_;	80 SuppressionFilter suppression_filter_;

86 PowerEchoModel power_echo_model_;

87 RenderBuffer X_buffer_;

88 RenderSignalAnalyzer render_signal_analyzer_;	81 RenderSignalAnalyzer render_signal_analyzer_;

89 OutputSelector output_selector_;	82 OutputSelector output_selector_;

90 ResidualEchoEstimator residual_echo_estimator_;	83 ResidualEchoEstimator residual_echo_estimator_;

91 bool echo_leakage_detected_ = false;	84 bool echo_leakage_detected_ = false;

92 AecState aec_state_;	85 AecState aec_state_;

93 EchoRemoverMetrics metrics_;	86 EchoRemoverMetrics metrics_;

94	87

95 RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl);	88 RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl);

96 };	89 };

97	90

98 int EchoRemoverImpl::instance_count_ = 0;	91 int EchoRemoverImpl::instance_count_ = 0;

99	92

100 EchoRemoverImpl::EchoRemoverImpl(int sample_rate_hz)	93 EchoRemoverImpl::EchoRemoverImpl(int sample_rate_hz)

101 : fft_(),	94 : fft_(),

102 data_dumper_(	95 data_dumper_(

103 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),	96 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),

104 optimization_(DetectOptimization()),	97 optimization_(DetectOptimization()),

105 sample_rate_hz_(sample_rate_hz),	98 sample_rate_hz_(sample_rate_hz),

106 subtractor_(data_dumper_.get(), optimization_),	99 subtractor_(data_dumper_.get(), optimization_),

107 suppression_gain_(optimization_),	100 suppression_gain_(optimization_),

108 cng_(optimization_),	101 cng_(optimization_),

109 suppression_filter_(sample_rate_hz_),	102 suppression_filter_(sample_rate_hz_) {

110 X_buffer_(optimization_,

111 NumBandsForRate(sample_rate_hz_),

112 std::max(subtractor_.MinFarendBufferLength(),

113 power_echo_model_.MinFarendBufferLength()),

114 subtractor_.NumBlocksInRenderSums()) {

115 RTC_DCHECK(ValidFullBandRate(sample_rate_hz));	103 RTC_DCHECK(ValidFullBandRate(sample_rate_hz));

116 }	104 }

117	105

118 EchoRemoverImpl::~EchoRemoverImpl() = default;	106 EchoRemoverImpl::~EchoRemoverImpl() = default;

119	107

120 void EchoRemoverImpl::ProcessCapture(	108 void EchoRemoverImpl::ProcessCapture(

121 const rtc::Optional<size_t>& echo_path_delay_samples,	109 const rtc::Optional<size_t>& echo_path_delay_samples,

122 const EchoPathVariability& echo_path_variability,	110 const EchoPathVariability& echo_path_variability,

123 bool capture_signal_saturation,	111 bool capture_signal_saturation,

124 const RenderBuffer& render_buffer,	112 const RenderBuffer& render_buffer,

125 std::vector<std::vector<float>>* capture) {	113 std::vector<std::vector<float>>* capture) {

126 const std::vector<std::vector<float>>& x = render_buffer.MostRecentBlock();	114 const std::vector<std::vector<float>>& x = render_buffer.MostRecentBlock();

127 std::vector<std::vector<float>>* y = capture;	115 std::vector<std::vector<float>>* y = capture;

128	116

129 RTC_DCHECK(y);	117 RTC_DCHECK(y);

130 RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_));	118 RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_));

131 RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_));	119 RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_));

132 RTC_DCHECK_EQ(x[0].size(), kBlockSize);	120 RTC_DCHECK_EQ(x[0].size(), kBlockSize);

133 RTC_DCHECK_EQ((*y)[0].size(), kBlockSize);	121 RTC_DCHECK_EQ((*y)[0].size(), kBlockSize);

134 const std::vector<float>& x0 = x[0];	122 const std::vector<float>& x0 = x[0];

135 std::vector<float>& y0 = (*y)[0];	123 std::vector<float>& y0 = (*y)[0];

136	124

137 data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize, &y0[0],	125 data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize, &y0[0],

138 LowestBandRate(sample_rate_hz_), 1);	126 LowestBandRate(sample_rate_hz_), 1);

139 data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize, &x0[0],	127 data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0],

140 LowestBandRate(sample_rate_hz_), 1);	128 LowestBandRate(sample_rate_hz_), 1);

141	129

142 aec_state_.UpdateCaptureSaturation(capture_signal_saturation);	130 aec_state_.UpdateCaptureSaturation(capture_signal_saturation);

143	131

144 if (echo_path_variability.AudioPathChanged()) {	132 if (echo_path_variability.AudioPathChanged()) {

145 subtractor_.HandleEchoPathChange(echo_path_variability);	133 subtractor_.HandleEchoPathChange(echo_path_variability);

146 residual_echo_estimator_.HandleEchoPathChange(echo_path_variability);	134 aec_state_.HandleEchoPathChange(echo_path_variability);

147 }	135 }

148	136

149 std::array<float, kFftLengthBy2Plus1> Y2;	137 std::array<float, kFftLengthBy2Plus1> Y2;

150 std::array<float, kFftLengthBy2Plus1> S2_power;

151 std::array<float, kFftLengthBy2Plus1> R2;	138 std::array<float, kFftLengthBy2Plus1> R2;

152 std::array<float, kFftLengthBy2Plus1> S2_linear;	139 std::array<float, kFftLengthBy2Plus1> S2_linear;

153 std::array<float, kFftLengthBy2Plus1> G;	140 std::array<float, kFftLengthBy2Plus1> G;

	141 float high_bands_gain;

154 FftData Y;	142 FftData Y;

155 FftData comfort_noise;	143 FftData comfort_noise;

156 FftData high_band_comfort_noise;	144 FftData high_band_comfort_noise;

157 SubtractorOutput subtractor_output;	145 SubtractorOutput subtractor_output;

158 FftData& E_main = subtractor_output.E_main;	146 FftData& E_main = subtractor_output.E_main;

159 auto& E2_main = subtractor_output.E2_main;	147 auto& E2_main = subtractor_output.E2_main;

160 auto& E2_shadow = subtractor_output.E2_shadow;	148 auto& E2_shadow = subtractor_output.E2_shadow;

161 auto& e_main = subtractor_output.e_main;	149 auto& e_main = subtractor_output.e_main;

162 auto& e_shadow = subtractor_output.e_shadow;

163	150

164 // Analyze the render signal.	151 // Analyze the render signal.

165 render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay());	152 render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay());

166	153

167 // Perform linear echo cancellation.	154 // Perform linear echo cancellation.

168 subtractor_.Process(render_buffer, y0, render_signal_analyzer_,	155 subtractor_.Process(render_buffer, y0, render_signal_analyzer_, aec_state_,

169 aec_state_.SaturatedCapture(), &subtractor_output);	156 &subtractor_output);

170	157

171 // Compute spectra.	158 // Compute spectra.

172 fft_.ZeroPaddedFft(y0, &Y);	159 fft_.ZeroPaddedFft(y0, &Y);

173 LinearEchoPower(E_main, Y, &S2_linear);	160 LinearEchoPower(E_main, Y, &S2_linear);

174 Y.Spectrum(optimization_, &Y2);	161 Y.Spectrum(optimization_, &Y2);

175	162

176 // Update the AEC state information.	163 // Update the AEC state information.

177 aec_state_.Update(subtractor_.FilterFrequencyResponse(),	164 aec_state_.Update(subtractor_.FilterFrequencyResponse(),

178 echo_path_delay_samples, render_buffer, E2_main, E2_shadow,	165 echo_path_delay_samples, render_buffer, E2_main, Y2, x0,

179 Y2, x0, echo_path_variability, echo_leakage_detected_);	166 echo_leakage_detected_);

180

181 // Use the power model to estimate the echo.

182 // TODO(peah): Remove in upcoming CL.

183 // power_echo_model_.EstimateEcho(render_buffer, Y2, aec_state_, &S2_power);

184	167

185 // Choose the linear output.	168 // Choose the linear output.

186 output_selector_.FormLinearOutput(e_main, y0);	169 output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);

187 data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],	170 data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],

188 LowestBandRate(sample_rate_hz_), 1);	171 LowestBandRate(sample_rate_hz_), 1);

189 const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;	172 const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;

190	173

191 // Estimate the residual echo power.	174 // Estimate the residual echo power.

192 residual_echo_estimator_.Estimate(	175 residual_echo_estimator_.Estimate(output_selector_.UseSubtractorOutput(),

193 output_selector_.UseSubtractorOutput(), aec_state_, render_buffer,	176 aec_state_, render_buffer, S2_linear, Y2,

194 subtractor_.FilterFrequencyResponse(), E2_main, E2_shadow, S2_linear,	177 &R2);

195 S2_power, Y2, &R2);

196	178

197 // Estimate the comfort noise.	179 // Estimate the comfort noise.

198 cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);	180 cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);

199	181

200 // Detect basic doubletalk.

201 const bool doubletalk = BlockPower(e_shadow) < BlockPower(e_main);

202

203 // A choose and apply echo suppression gain.	182 // A choose and apply echo suppression gain.

204 suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),	183 suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),

205 doubletalk ? 0.001f : 0.0001f, &G);	184 aec_state_.SaturatedEcho(), x, y->size(),

206 suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, y);	185 &high_bands_gain, &G);

	186 suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,

	187 high_bands_gain, y);

207	188

208 // Update the metrics.	189 // Update the metrics.

209 metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);	190 metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);

210	191

211 // Debug outputs for the purpose of development and analysis.	192 // Debug outputs for the purpose of development and analysis.

212 data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum());	193 data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum());

213 data_dumper_->DumpRaw("aec3_suppressor_gain", G);	194 data_dumper_->DumpRaw("aec3_suppressor_gain", G);

214 data_dumper_->DumpWav("aec3_output",	195 data_dumper_->DumpWav("aec3_output",

215 rtc::ArrayView<const float>(&y0[0], kBlockSize),	196 rtc::ArrayView<const float>(&y0[0], kBlockSize),

216 LowestBandRate(sample_rate_hz_), 1);	197 LowestBandRate(sample_rate_hz_), 1);

217 data_dumper_->DumpRaw("aec3_using_subtractor_output",	198 data_dumper_->DumpRaw("aec3_using_subtractor_output",

218 output_selector_.UseSubtractorOutput() ? 1 : 0);	199 output_selector_.UseSubtractorOutput() ? 1 : 0);

219 data_dumper_->DumpRaw("aec3_doubletalk", doubletalk ? 1 : 0);

220 data_dumper_->DumpRaw("aec3_E2", E2);	200 data_dumper_->DumpRaw("aec3_E2", E2);

221 data_dumper_->DumpRaw("aec3_E2_main", E2_main);	201 data_dumper_->DumpRaw("aec3_E2_main", E2_main);

222 data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow);	202 data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow);

223 data_dumper_->DumpRaw("aec3_S2_linear", S2_linear);	203 data_dumper_->DumpRaw("aec3_S2_linear", S2_linear);

224 data_dumper_->DumpRaw("aec3_S2_power", S2_power);

225 data_dumper_->DumpRaw("aec3_Y2", Y2);	204 data_dumper_->DumpRaw("aec3_Y2", Y2);

	205 data_dumper_->DumpRaw("aec3_X2", render_buffer.Spectrum(0));

226 data_dumper_->DumpRaw("aec3_R2", R2);	206 data_dumper_->DumpRaw("aec3_R2", R2);

227 data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle());	207 data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle());

228 data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl());	208 data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl());

229 data_dumper_->DumpRaw("aec3_reliable_filter_bands",

230 aec_state_.BandsWithReliableFilter());

231 data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender());	209 data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender());

232 data_dumper_->DumpRaw("aec3_model_based_aec_feasible",

233 aec_state_.ModelBasedAecFeasible());

234 data_dumper_->DumpRaw("aec3_usable_linear_estimate",	210 data_dumper_->DumpRaw("aec3_usable_linear_estimate",

235 aec_state_.UsableLinearEstimate());	211 aec_state_.UsableLinearEstimate());

236 data_dumper_->DumpRaw(	212 data_dumper_->DumpRaw(

237 "aec3_filter_delay",	213 "aec3_filter_delay",

238 aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1);	214 aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1);

239 data_dumper_->DumpRaw(	215 data_dumper_->DumpRaw(

240 "aec3_external_delay",	216 "aec3_external_delay",

241 aec_state_.ExternalDelay() ? *aec_state_.ExternalDelay() : -1);	217 aec_state_.ExternalDelay() ? *aec_state_.ExternalDelay() : -1);

242 data_dumper_->DumpRaw("aec3_capture_saturation",	218 data_dumper_->DumpRaw("aec3_capture_saturation",

243 aec_state_.SaturatedCapture() ? 1 : 0);	219 aec_state_.SaturatedCapture() ? 1 : 0);

244 }	220 }

245	221

246 } // namespace	222 } // namespace

247	223

248 EchoRemover* EchoRemover::Create(int sample_rate_hz) {	224 EchoRemover* EchoRemover::Create(int sample_rate_hz) {

249 return new EchoRemoverImpl(sample_rate_hz);	225 return new EchoRemoverImpl(sample_rate_hz);

250 }	226 }

251	227

252 } // namespace webrtc	228 } // namespace webrtc

OLD	NEW