webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1821443003: Fix normalization of noise estimate in NoiseSuppressor

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1821443003: Fix normalization of noise estimate in NoiseSuppressor (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Disable noise suppressor bit-exactness unittests Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 11 matching lines...) Expand all Loading...
22	22

23 namespace webrtc {	23 namespace webrtc {

24	24

25 namespace {	25 namespace {

26	26

27 const size_t kErbResolution = 2;	27 const size_t kErbResolution = 2;

28 const int kWindowSizeMs = 16;	28 const int kWindowSizeMs = 16;

29 const int kChunkSizeMs = 10; // Size provided by APM.	29 const int kChunkSizeMs = 10; // Size provided by APM.

30 const float kClipFreqKhz = 0.2f;	30 const float kClipFreqKhz = 0.2f;

31 const float kKbdAlpha = 1.5f;	31 const float kKbdAlpha = 1.5f;

32 const float kLambdaBot = -1.0f; // Extreme values in bisection	32 const float kLambdaBot = -1.f; // Extreme values in bisection

33 const float kLambdaTop = -1e-5f; // search for lamda.	33 const float kLambdaTop = -1e-5f; // search for lamda.

34 const float kVoiceProbabilityThreshold = 0.02f;	34 const float kVoiceProbabilityThreshold = 0.02f;

35 // Number of chunks after voice activity which is still considered speech.	35 // Number of chunks after voice activity which is still considered speech.

36 const size_t kSpeechOffsetDelay = 80;	36 const size_t kSpeechOffsetDelay = 80;

37 const float kDecayRate = 0.98f; // Power estimation decay rate.	37 const float kDecayRate = 0.98f; // Power estimation decay rate.

38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.	38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.

39 const float kRho = 0.0004f; // Default production and interpretation SNR.	39 const float kRho = 0.0004f; // Default production and interpretation SNR.

	40 const float kPowerNormalizationFactor = 1.f / (1 << 30);

40	41

41 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.	42 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.

42 float DotProduct(const float* a, const float* b, size_t length) {	43 float DotProduct(const float* a, const float* b, size_t length) {

43 float ret = 0.f;	44 float ret = 0.f;

44 for (size_t i = 0; i < length; ++i) {	45 for (size_t i = 0; i < length; ++i) {

45 ret += a[i] * b[i];	46 ret += a[i] * b[i];

46 }	47 }

47 return ret;	48 return ret;

48 }	49 }

49	50

50 // Computes the power across ERB bands from the power spectral density \|pow\|.	51 // Computes the power across ERB bands from the power spectral density \|pow\|.

51 // Stores it in \|result\|.	52 // Stores it in \|result\|.

52 void MapToErbBands(const float* pow,	53 void MapToErbBands(const float* pow,

53 const std::vector<std::vector<float>>& filter_bank,	54 const std::vector<std::vector<float>>& filter_bank,

54 float* result) {	55 float* result) {

55 for (size_t i = 0; i < filter_bank.size(); ++i) {	56 for (size_t i = 0; i < filter_bank.size(); ++i) {

56 RTC_DCHECK_GT(filter_bank[i].size(), 0u);	57 RTC_DCHECK_GT(filter_bank[i].size(), 0u);

57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());	58 result[i] = kPowerNormalizationFactor *

	59 DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());

58 }	60 }

59 }	61 }

60	62

61 } // namespace	63 } // namespace

62	64

63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,	65 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,

64 size_t num_render_channels,	66 size_t num_render_channels,

65 size_t num_noise_bins)	67 size_t num_noise_bins)

66 : freqs_(RealFourier::ComplexLength(	68 : freqs_(RealFourier::ComplexLength(

67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),	69 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133 if (is_speech_) {	135 if (is_speech_) {

134 clear_power_estimator_.Step(in_block[0]);	136 clear_power_estimator_.Step(in_block[0]);

135 }	137 }

136 const std::vector<float>& clear_power = clear_power_estimator_.power();	138 const std::vector<float>& clear_power = clear_power_estimator_.power();

137 const std::vector<float>& noise_power = noise_power_estimator_.power();	139 const std::vector<float>& noise_power = noise_power_estimator_.power();

138 MapToErbBands(clear_power.data(), render_filter_bank_,	140 MapToErbBands(clear_power.data(), render_filter_bank_,

139 filtered_clear_pow_.data());	141 filtered_clear_pow_.data());

140 MapToErbBands(noise_power.data(), capture_filter_bank_,	142 MapToErbBands(noise_power.data(), capture_filter_bank_,

141 filtered_noise_pow_.data());	143 filtered_noise_pow_.data());

142 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());	144 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

143 const float power_target =	145 const float power_target = std::accumulate(

144 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f);	146 filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f);

145 const float power_top =	147 const float power_top =

146 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	148 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

147 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());	149 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());

148 const float power_bot =	150 const float power_bot =

149 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	151 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

150 if (power_target >= power_bot && power_target <= power_top) {	152 if (power_target >= power_bot && power_target <= power_top) {

151 SolveForLambda(power_target);	153 SolveForLambda(power_target);

152 UpdateErbGains();	154 UpdateErbGains();

153 } // Else experiencing power underflow, so do nothing.	155 } // Else experiencing power underflow, so do nothing.

154 for (size_t i = 0; i < in_channels; ++i) {	156 for (size_t i = 0; i < in_channels; ++i) {

(...skipping 151 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);	308 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);

307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	309 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

308 chunks_since_voice_ = 0;	310 chunks_since_voice_ = 0;

309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	311 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

310 ++chunks_since_voice_;	312 ++chunks_since_voice_;

311 }	313 }

312 return chunks_since_voice_ < kSpeechOffsetDelay;	314 return chunks_since_voice_ < kSpeechOffsetDelay;

313 }	315 }

314	316

315 } // namespace webrtc	317 } // namespace webrtc

OLD	NEW

« no previous file with comments | « no previous file | webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc » ('j') | no next file with comments »