webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc - Issue 2110593003: Pull out the PostFilter to its own NonlinearBeamformer API

Side by Side Diff: webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc

Issue 2110593003: Pull out the PostFilter to its own NonlinearBeamformer API (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Keep old interfaces to not break dependencies Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('K') | « webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h ('k') | webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 104 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
115 }	115 }

116	116

117 return result;	117 return result;

118 }	118 }

119	119

120 // Works for positive numbers only.	120 // Works for positive numbers only.

121 size_t Round(float x) {	121 size_t Round(float x) {

122 return static_cast<size_t>(std::floor(x + 0.5f));	122 return static_cast<size_t>(std::floor(x + 0.5f));

123 }	123 }

124	124

125 // Calculates the sum of absolute values of a complex matrix.

126 float SumAbs(const ComplexMatrix<float>& mat) {

127 float sum_abs = 0.f;

128 const complex<float>* const* mat_els = mat.elements();

129 for (size_t i = 0; i < mat.num_rows(); ++i) {

130 for (size_t j = 0; j < mat.num_columns(); ++j) {

131 sum_abs += std::abs(mat_els[i][j]);

132 }

133 }

134 return sum_abs;

135 }

136

137 // Calculates the sum of squares of a complex matrix.	125 // Calculates the sum of squares of a complex matrix.

138 float SumSquares(const ComplexMatrix<float>& mat) {	126 float SumSquares(const ComplexMatrix<float>& mat) {

139 float sum_squares = 0.f;	127 float sum_squares = 0.f;

140 const complex<float>* const* mat_els = mat.elements();	128 const complex<float>* const* mat_els = mat.elements();

141 for (size_t i = 0; i < mat.num_rows(); ++i) {	129 for (size_t i = 0; i < mat.num_rows(); ++i) {

142 for (size_t j = 0; j < mat.num_columns(); ++j) {	130 for (size_t j = 0; j < mat.num_columns(); ++j) {

143 float abs_value = std::abs(mat_els[i][j]);	131 float abs_value = std::abs(mat_els[i][j]);

144 sum_squares += abs_value * abs_value;	132 sum_squares += abs_value * abs_value;

145 }	133 }

146 }	134 }

(...skipping 29 matching lines...) Expand all Loading...
176 return array_geometry;	164 return array_geometry;

177 }	165 }

178	166

179 } // namespace	167 } // namespace

180	168

181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);	169 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);

182	170

183 // static	171 // static

184 const size_t NonlinearBeamformer::kNumFreqBins;	172 const size_t NonlinearBeamformer::kNumFreqBins;

185	173

	174 PostFilterTransform::PostFilterTransform(size_t num_channels,

	175 size_t chunk_length,

	176 float* window,

	177 size_t fft_size)

	178 : transform_(num_channels,

	179 num_channels,

	180 chunk_length,

	181 window,

	182 fft_size,

	183 fft_size / 2,

	184 this),

	185 num_freq_bins_(fft_size / 2 + 1) {}

	186

	187 void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) {

	188 final_mask_ = final_mask;

	189 transform_.ProcessChunk(data, data);

	190 }

	191

	192 void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input,

	193 size_t num_input_channels,

	194 size_t num_freq_bins,

	195 size_t num_output_channels,

	196 complex<float>* const* output) {

	197 RTC_DCHECK_EQ(num_freq_bins_, num_freq_bins);

	198 RTC_DCHECK_EQ(num_input_channels, num_output_channels);

	199

	200 for (size_t ch = 0; ch < num_input_channels; ++ch) {

	201 for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) {

	202 output[ch][f_ix] =

	203 kCompensationGain * final_mask_[f_ix] * input[ch][f_ix];

	204 }

	205 }

	206 }

	207

186 NonlinearBeamformer::NonlinearBeamformer(	208 NonlinearBeamformer::NonlinearBeamformer(

187 const std::vector<Point>& array_geometry,	209 const std::vector<Point>& array_geometry,

	210 size_t num_postfilter_channels,

188 SphericalPointf target_direction)	211 SphericalPointf target_direction)

189 : num_input_channels_(array_geometry.size()),	212 : num_input_channels_(array_geometry.size()),

	213 num_postfilter_channels_(num_postfilter_channels),

190 array_geometry_(GetCenteredArray(array_geometry)),	214 array_geometry_(GetCenteredArray(array_geometry)),

191 array_normal_(GetArrayNormalIfExists(array_geometry)),	215 array_normal_(GetArrayNormalIfExists(array_geometry)),

192 min_mic_spacing_(GetMinimumSpacing(array_geometry)),	216 min_mic_spacing_(GetMinimumSpacing(array_geometry)),

193 target_angle_radians_(target_direction.azimuth()),	217 target_angle_radians_(target_direction.azimuth()),

194 away_radians_(std::min(	218 away_radians_(std::min(

195 static_cast<float>(M_PI),	219 static_cast<float>(M_PI),

196 std::max(kMinAwayRadians,	220 std::max(kMinAwayRadians,

197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {	221 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {

198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);	222 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);

199 }	223 }

200	224

201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {	225 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {

202 chunk_length_ =	226 chunk_length_ =

203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));	227 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));

204 sample_rate_hz_ = sample_rate_hz;	228 sample_rate_hz_ = sample_rate_hz;

205	229

206 high_pass_postfilter_mask_ = 1.f;	230 high_pass_postfilter_mask_ = 1.f;

207 is_target_present_ = false;	231 is_target_present_ = false;

208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;	232 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;

209 interference_blocks_count_ = hold_target_blocks_;	233 interference_blocks_count_ = hold_target_blocks_;

210	234

211 lapped_transform_.reset(new LappedTransform(num_input_channels_,	235 process_transform_.reset(new LappedTransform(num_input_channels_,

212 1,	236 0u,

213 chunk_length_,	237 chunk_length_,

214 window_,	238 window_,

215 kFftSize,	239 kFftSize,

216 kFftSize / 2,	240 kFftSize / 2,

217 this));	241 this));

	242 postfilter_transform_.reset(new PostFilterTransform(

	243 num_postfilter_channels_, chunk_length_, window_, kFftSize));

	244 const float wave_number_step =

	245 (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds);

218 for (size_t i = 0; i < kNumFreqBins; ++i) {	246 for (size_t i = 0; i < kNumFreqBins; ++i) {

219 time_smooth_mask_[i] = 1.f;	247 time_smooth_mask_[i] = 1.f;

220 final_mask_[i] = 1.f;	248 final_mask_[i] = 1.f;

221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;	249 wave_numbers_[i] = i * wave_number_step;

222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;

223 }	250 }

224	251

225 InitLowFrequencyCorrectionRanges();	252 InitLowFrequencyCorrectionRanges();

226 InitDiffuseCovMats();	253 InitDiffuseCovMats();

227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));	254 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));

228 }	255 }

229	256

230 // These bin indexes determine the regions over which a mean is taken. This is	257 // These bin indexes determine the regions over which a mean is taken. This is

231 // applied as a constant value over the adjacent end "frequency correction"	258 // applied as a constant value over the adjacent end "frequency correction"

232 // regions.	259 // regions.

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
299 void NonlinearBeamformer::InitDelaySumMasks() {	326 void NonlinearBeamformer::InitDelaySumMasks() {

300 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {	327 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {

301 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);	328 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);

302 CovarianceMatrixGenerator::PhaseAlignmentMasks(	329 CovarianceMatrixGenerator::PhaseAlignmentMasks(

303 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,	330 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,

304 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);	331 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);

305	332

306 complex_f norm_factor = sqrt(	333 complex_f norm_factor = sqrt(

307 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));	334 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));

308 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);	335 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);

309 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);

310 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(

311 normalized_delay_sum_masks_[f_ix]));

312 }	336 }

313 }	337 }

314	338

315 void NonlinearBeamformer::InitTargetCovMats() {	339 void NonlinearBeamformer::InitTargetCovMats() {

316 for (size_t i = 0; i < kNumFreqBins; ++i) {	340 for (size_t i = 0; i < kNumFreqBins; ++i) {

317 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);	341 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);

318 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);	342 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);

319 }	343 }

320 }	344 }

321	345

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
359 void NonlinearBeamformer::NormalizeCovMats() {	383 void NonlinearBeamformer::NormalizeCovMats() {

360 for (size_t i = 0; i < kNumFreqBins; ++i) {	384 for (size_t i = 0; i < kNumFreqBins; ++i) {

361 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);	385 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);

362 rpsiws_[i].clear();	386 rpsiws_[i].clear();

363 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {	387 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {

364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));	388 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));

365 }	389 }

366 }	390 }

367 }	391 }

368	392

	393 void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) {

	394 RTC_DCHECK_EQ(data.num_channels(), num_input_channels_);

	395 RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_);

	396

	397 old_high_pass_mask_ = high_pass_postfilter_mask_;

	398 process_transform_->ProcessChunk(data.channels(0), nullptr);

	399 }

	400

	401 void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) {

	402 RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_);

	403 // TODO(aluebs): Change to RTC_CHECK_EQ once the ChannelBuffer is updated.

	404 RTC_DCHECK_GE(data->num_channels(), num_postfilter_channels_);

	405

	406 postfilter_transform_->ProcessChunk(data->channels(0), final_mask_);

	407

	408 // Ramp up/down for smoothing is needed in order to avoid discontinuities in

	409 // the transitions between 10 ms frames.

	410 const float ramp_increment =

	411 (high_pass_postfilter_mask_ - old_high_pass_mask_) /

	412 data->num_frames_per_band();

	413 for (size_t i = 1; i < data->num_bands(); ++i) {

	414 float smoothed_mask = old_high_pass_mask_;

	415 for (size_t j = 0; j < data->num_frames_per_band(); ++j) {

	416 smoothed_mask += ramp_increment;

	417 for (size_t k = 0; k < num_postfilter_channels_; ++k) {

	418 data->channels(i)[k][j] *= smoothed_mask;

	419 }

	420 }

	421 }

	422 }

	423

369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,	424 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,

370 ChannelBuffer<float>* output) {	425 ChannelBuffer<float>* output) {

371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);	426 AnalyzeChunk(input);

372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);	427 for (size_t i = 0u; i < input.num_bands(); ++i) {
	peah-webrtc 2016/06/29 08:38:20 Would it be possible to handle the output number o Would it be possible to handle the output number of channels in a less error-prone way. As it is now, there is an unnecessary dependency between how the user of the method handles the output. Would it be possible to either: 1) DCHECK on that the number of channels in output is 1? 2) Set the number of channels in output to 1. 3) Change the memcpy so that the input is downmixed/upmixed to the number of channels in output. aluebs-webrtc 2016/06/29 23:19:55 To be compatible with how the beamformer was used Show quoted text On 2016/06/29 08:38:20, peah-webrtc wrote: > Would it be possible to handle the output number of channels in a less > error-prone way. > As it is now, there is an unnecessary dependency between how the user of the > method handles the output. > > Would it be possible to either: > 1) DCHECK on that the number of channels in output is 1? > 2) Set the number of channels in output to 1. > 3) Change the memcpy so that the input is downmixed/upmixed to the number of > channels in output. > To be compatible with how the beamformer was used before this CL (the whole point of re-adding this interface) we: 1) can't enforce that the number of channels of the output is 1, since it could be the same channel_buffer than the input. 2) can't change the number of channels of the output, since that wasn't how it behaved and again it could be the same channel_buffer than the input. On the old bf interface, the behavior was exactly like this and copying the number of channels of the output (which can be the same as the input) will add unecessary complexity. But I guess it ensures it can be used with a different number of output channels (not supported in the old interface), so I did that. peah-webrtc 2016/06/30 05:25:05 I know this is how it is used to be, but it is qui Show quoted text On 2016/06/29 23:19:55, aluebs-webrtc wrote: > On 2016/06/29 08:38:20, peah-webrtc wrote: > > Would it be possible to handle the output number of channels in a less > > error-prone way. > > As it is now, there is an unnecessary dependency between how the user of the > > method handles the output. > > > > Would it be possible to either: > > 1) DCHECK on that the number of channels in output is 1? > > 2) Set the number of channels in output to 1. > > 3) Change the memcpy so that the input is downmixed/upmixed to the number of > > channels in output. > > > > To be compatible with how the beamformer was used before this CL (the whole > point of re-adding this interface) we: > 1) can't enforce that the number of channels of the output is 1, since it could > be the same channel_buffer than the input. > 2) can't change the number of channels of the output, since that wasn't how it > behaved and again it could be the same channel_buffer than the input. > > On the old bf interface, the behavior was exactly like this and copying the > number of channels of the output (which can be the same as the input) will add > unecessary complexity. But I guess it ensures it can be used with a different > number of output channels (not supported in the old interface), so I did that. I know this is how it is used to be, but it is quite error-prone and that was why I wanted to see whether it could be changed. By only copying the left channel to the output, you are in fact enforcing the number of channels in the output to 1, as any other channels than the left one will not be updated. Refraining to set the number of channels to 1 in this part of the code does not change that. I don't really see why it would be a problem to change the output number of channels when the input and the output refer to the same ChannelBuffer (via an in-place call). The memcopy operation is valid regardless of that, and the const-ness of input does not impose any restrictions on output just because they happen to point to the same ChannelBuffer. To me, since you anyway here copied only the left channel, you might as well also set the number of channels to 1 as that is what you really do when you only copy the left channel but I'm probably missing something. aluebs-webrtc 2016/06/30 23:37:09 This is a temporary interface to be able to migrat Show quoted text On 2016/06/30 05:25:05, peah-webrtc wrote: > On 2016/06/29 23:19:55, aluebs-webrtc wrote: > > On 2016/06/29 08:38:20, peah-webrtc wrote: > > > Would it be possible to handle the output number of channels in a less > > > error-prone way. > > > As it is now, there is an unnecessary dependency between how the user of the > > > method handles the output. > > > > > > Would it be possible to either: > > > 1) DCHECK on that the number of channels in output is 1? > > > 2) Set the number of channels in output to 1. > > > 3) Change the memcpy so that the input is downmixed/upmixed to the number of > > > channels in output. > > > > > > > To be compatible with how the beamformer was used before this CL (the whole > > point of re-adding this interface) we: > > 1) can't enforce that the number of channels of the output is 1, since it > could > > be the same channel_buffer than the input. > > 2) can't change the number of channels of the output, since that wasn't how it > > behaved and again it could be the same channel_buffer than the input. > > > > On the old bf interface, the behavior was exactly like this and copying the > > number of channels of the output (which can be the same as the input) will add > > unecessary complexity. But I guess it ensures it can be used with a different > > number of output channels (not supported in the old interface), so I did that. > > I know this is how it is used to be, but it is quite error-prone and that was > why I wanted to see whether it could be changed. > > By only copying the left channel to the output, you are in fact enforcing the > number of channels in the output to 1, as any other channels than the left one > will not be updated. Refraining to set the number of channels to 1 in this part > of the code does not change that. > > I don't really see why it would be a problem to change the output number of > channels when the input and the output refer to the same ChannelBuffer (via an > in-place call). The memcopy operation is valid regardless of that, and the > const-ness of input does not impose any restrictions on output just because they > happen to point to the same ChannelBuffer. > > To me, since you anyway here copied only the left channel, you might as well > also set the number of channels to 1 as that is what you really do when you only > copy the left channel but I'm probably missing something. > This is a temporary interface to be able to migrate the dependencies without breaking them and will be removed as soon as we migrate the dependencies. We shouldn't change the behavior, since that could break the dependencies that were not expecting the BF to change the channels of its ChannelBuffer.
373	428 std::memcpy(output->channels(i)[0], input.channels(i)[0],

374 float old_high_pass_mask = high_pass_postfilter_mask_;	429 sizeof(input.channels(0)[0][0]) * input.num_frames_per_band());

375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));

376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible

377 // discontinuities.

378 const float ramp_increment =

379 (high_pass_postfilter_mask_ - old_high_pass_mask) /

380 input.num_frames_per_band();

381 // Apply the smoothed high-pass mask to the first channel of each band.

382 // This can be done because the effect of the linear beamformer is negligible

383 // compared to the post-filter.

384 for (size_t i = 1; i < input.num_bands(); ++i) {

385 float smoothed_mask = old_high_pass_mask;

386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {

387 smoothed_mask += ramp_increment;

388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;

389 }

390 }	430 }

	431 PostFilter(output);

391 }	432 }

392	433

393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {	434 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {

394 target_angle_radians_ = target_direction.azimuth();	435 target_angle_radians_ = target_direction.azimuth();

395 InitHighFrequencyCorrectionRanges();	436 InitHighFrequencyCorrectionRanges();

396 InitInterfAngles();	437 InitInterfAngles();

397 InitDelaySumMasks();	438 InitDelaySumMasks();

398 InitTargetCovMats();	439 InitTargetCovMats();

399 InitInterfCovMats();	440 InitInterfCovMats();

400 NormalizeCovMats();	441 NormalizeCovMats();

401 }	442 }

402	443

403 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {	444 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {

404 // If more than half-beamwidth degrees away from the beam's center,	445 // If more than half-beamwidth degrees away from the beam's center,

405 // you are out of the beam.	446 // you are out of the beam.

406 return fabs(spherical_point.azimuth() - target_angle_radians_) <	447 return fabs(spherical_point.azimuth() - target_angle_radians_) <

407 kHalfBeamWidthRadians;	448 kHalfBeamWidthRadians;

408 }	449 }

409	450

410 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,	451 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,

411 size_t num_input_channels,	452 size_t num_input_channels,

412 size_t num_freq_bins,	453 size_t num_freq_bins,

413 size_t num_output_channels,	454 size_t num_output_channels,

414 complex_f* const* output) {	455 complex_f* const* output) {

415 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);	456 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);

416 RTC_CHECK_EQ(num_input_channels_, num_input_channels);	457 RTC_CHECK_EQ(num_input_channels_, num_input_channels);

417 RTC_CHECK_EQ(1u, num_output_channels);	458 RTC_CHECK_EQ(0u, num_output_channels);

418	459

419 // Calculating the post-filter masks. Note that we need two for each	460 // Calculating the post-filter masks. Note that we need two for each

420 // frequency bin to account for the positive and negative interferer	461 // frequency bin to account for the positive and negative interferer

421 // angle.	462 // angle.

422 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {	463 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {

423 eig_m_.CopyFromColumn(input, i, num_input_channels_);	464 eig_m_.CopyFromColumn(input, i, num_input_channels_);

424 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));	465 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));

425 if (eig_m_norm_factor != 0.f) {	466 if (eig_m_norm_factor != 0.f) {

426 eig_m_.Scale(1.f / eig_m_norm_factor);	467 eig_m_.Scale(1.f / eig_m_norm_factor);

427 }	468 }

(...skipping 21 matching lines...) Expand all Loading...
449 new_mask_[i] = tmp_mask;	490 new_mask_[i] = tmp_mask;

450 }	491 }

451 }	492 }

452 }	493 }

453	494

454 ApplyMaskTimeSmoothing();	495 ApplyMaskTimeSmoothing();

455 EstimateTargetPresence();	496 EstimateTargetPresence();

456 ApplyLowFrequencyCorrection();	497 ApplyLowFrequencyCorrection();

457 ApplyHighFrequencyCorrection();	498 ApplyHighFrequencyCorrection();

458 ApplyMaskFrequencySmoothing();	499 ApplyMaskFrequencySmoothing();

459 ApplyMasks(input, output);

460 }	500 }

461	501

462 float NonlinearBeamformer::CalculatePostfilterMask(	502 float NonlinearBeamformer::CalculatePostfilterMask(

463 const ComplexMatrixF& interf_cov_mat,	503 const ComplexMatrixF& interf_cov_mat,

464 float rpsiw,	504 float rpsiw,

465 float ratio_rxiw_rxim,	505 float ratio_rxiw_rxim,

466 float rmw_r) {	506 float rmw_r) {

467 float rpsim = Norm(interf_cov_mat, eig_m_);	507 float rpsim = Norm(interf_cov_mat, eig_m_);

468	508

469 float ratio = 0.f;	509 float ratio = 0.f;

470 if (rpsim > 0.f) {	510 if (rpsim > 0.f) {

471 ratio = rpsiw / rpsim;	511 ratio = rpsiw / rpsim;

472 }	512 }

473	513

474 float numerator = 1.f - kCutOffConstant;	514 float numerator = 1.f - kCutOffConstant;

475 if (rmw_r > 0.f) {	515 if (rmw_r > 0.f) {

476 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r);	516 numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r);

477 }	517 }

478	518

479 float denominator = 1.f - kCutOffConstant;	519 float denominator = 1.f - kCutOffConstant;

480 if (ratio_rxiw_rxim > 0.f) {	520 if (ratio_rxiw_rxim > 0.f) {

481 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim);	521 denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim);

482 }	522 }

483	523

484 return numerator / denominator;	524 return numerator / denominator;

485 }	525 }

486	526

487 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,

488 complex_f* const* output) {

489 complex_f* output_channel = output[0];

490 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {

491 output_channel[f_ix] = complex_f(0.f, 0.f);

492

493 const complex_f* delay_sum_mask_els =

494 normalized_delay_sum_masks_[f_ix].elements()[0];

495 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {

496 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];

497 }

498

499 output_channel[f_ix] = kCompensationGain final_mask_[f_ix];

500 }

501 }

502

503 // Smooth new_mask_ into time_smooth_mask_.	527 // Smooth new_mask_ into time_smooth_mask_.

504 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {	528 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {

505 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {	529 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {

506 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +	530 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +

507 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];	531 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];

508 }	532 }

509 }	533 }

510	534

511 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.	535 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.

512 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {	536 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
570 new_mask_ + high_mean_end_bin_ + 1);	594 new_mask_ + high_mean_end_bin_ + 1);

571 if (new_mask_[quantile] > kMaskTargetThreshold) {	595 if (new_mask_[quantile] > kMaskTargetThreshold) {

572 is_target_present_ = true;	596 is_target_present_ = true;

573 interference_blocks_count_ = 0;	597 interference_blocks_count_ = 0;

574 } else {	598 } else {

575 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;	599 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;

576 }	600 }

577 }	601 }

578	602

579 } // namespace webrtc	603 } // namespace webrtc

OLD	NEW