webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1848123002: Tweak kDecayRate in the IntelligibilityEnhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1848123002: Tweak kDecayRate in the IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 16 matching lines...) Expand all Loading...
27 const size_t kErbResolution = 2;	27 const size_t kErbResolution = 2;

28 const int kWindowSizeMs = 16;	28 const int kWindowSizeMs = 16;

29 const int kChunkSizeMs = 10; // Size provided by APM.	29 const int kChunkSizeMs = 10; // Size provided by APM.

30 const float kClipFreqKhz = 0.2f;	30 const float kClipFreqKhz = 0.2f;

31 const float kKbdAlpha = 1.5f;	31 const float kKbdAlpha = 1.5f;

32 const float kLambdaBot = -1.0f; // Extreme values in bisection	32 const float kLambdaBot = -1.0f; // Extreme values in bisection

33 const float kLambdaTop = -1e-5f; // search for lamda.	33 const float kLambdaTop = -1e-5f; // search for lamda.

34 const float kVoiceProbabilityThreshold = 0.02f;	34 const float kVoiceProbabilityThreshold = 0.02f;

35 // Number of chunks after voice activity which is still considered speech.	35 // Number of chunks after voice activity which is still considered speech.

36 const size_t kSpeechOffsetDelay = 80;	36 const size_t kSpeechOffsetDelay = 80;

37 const float kDecayRate = 0.98f; // Power estimation decay rate.	37 const float kDecayRate = 0.994f; // Power estimation decay rate.
	peah-webrtc 2016/04/01 04:43:50 I cannot really say that I yet understand the unde I cannot really say that I yet understand the underlying algorithm but it seems a bit strange to lock the map the relative_change_limit in GainApplier to the decay rate. I'm not saying it is wrong to do so, but they seem quite independent, and are used in different places. Furthermore, this is a change from how it was before. hlundin-webrtc 2016/04/01 08:36:38 Acknowledged. Show quoted text On 2016/04/01 04:43:50, peah-webrtc wrote: > I cannot really say that I yet understand the underlying algorithm but it seems > a bit strange to lock the map the relative_change_limit in GainApplier to the > decay rate. > > I'm not saying it is wrong to do so, but they seem quite independent, and are > used in different places. Furthermore, this is a change from how it was before. Acknowledged. aluebs-webrtc 2016/04/01 17:33:35 Well, the decay rate of the PSD estimations doesn' Show quoted text On 2016/04/01 08:36:38, hlundin-webrtc wrote: > On 2016/04/01 04:43:50, peah-webrtc wrote: > > I cannot really say that I yet understand the underlying algorithm but it > seems > > a bit strange to lock the map the relative_change_limit in GainApplier to the > > decay rate. > > > > I'm not saying it is wrong to do so, but they seem quite independent, and are > > used in different places. Furthermore, this is a change from how it was > before. > > Acknowledged. Well, the decay rate of the PSD estimations doesn't necessarily have to be linked to the maximum gain change, but it kind of makes sense, since both are linked to the speed the algorithm can adapt. And I find it elegant to only have one constant to tweak the adaptation speed. But of course, I can go back to having 2 different constants if you think it adds value.
38 const float kMaxRelativeGainChange = 0.04f; // Maximum relative change in gain.

39 const float kRho = 0.0004f; // Default production and interpretation SNR.	38 const float kRho = 0.0004f; // Default production and interpretation SNR.

40	39

41 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.	40 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.

42 float DotProduct(const float* a, const float* b, size_t length) {	41 float DotProduct(const float* a, const float* b, size_t length) {

43 float ret = 0.f;	42 float ret = 0.f;

44 for (size_t i = 0; i < length; ++i) {	43 for (size_t i = 0; i < length; ++i) {

45 ret += a[i] * b[i];	44 ret += a[i] * b[i];

46 }	45 }

47 return ret;	46 return ret;

48 }	47 }

(...skipping 22 matching lines...) Expand all Loading...
71 sample_rate_hz_(sample_rate_hz),	70 sample_rate_hz_(sample_rate_hz),

72 num_render_channels_(num_render_channels),	71 num_render_channels_(num_render_channels),

73 clear_power_estimator_(freqs_, kDecayRate),	72 clear_power_estimator_(freqs_, kDecayRate),

74 noise_power_estimator_(num_noise_bins, kDecayRate),	73 noise_power_estimator_(num_noise_bins, kDecayRate),

75 filtered_clear_pow_(bank_size_, 0.f),	74 filtered_clear_pow_(bank_size_, 0.f),

76 filtered_noise_pow_(num_noise_bins, 0.f),	75 filtered_noise_pow_(num_noise_bins, 0.f),

77 center_freqs_(bank_size_),	76 center_freqs_(bank_size_),

78 capture_filter_bank_(CreateErbBank(num_noise_bins)),	77 capture_filter_bank_(CreateErbBank(num_noise_bins)),

79 render_filter_bank_(CreateErbBank(freqs_)),	78 render_filter_bank_(CreateErbBank(freqs_)),

80 gains_eq_(bank_size_),	79 gains_eq_(bank_size_),

81 gain_applier_(freqs_, kMaxRelativeGainChange),	80 gain_applier_(freqs_, 1.f - kDecayRate),

82 audio_s16_(chunk_length_),	81 audio_s16_(chunk_length_),

83 chunks_since_voice_(kSpeechOffsetDelay),	82 chunks_since_voice_(kSpeechOffsetDelay),

84 is_speech_(false),	83 is_speech_(false),

85 noise_estimation_buffer_(num_noise_bins),	84 noise_estimation_buffer_(num_noise_bins),

86 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,	85 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,

87 std::vector<float>(num_noise_bins),	86 std::vector<float>(num_noise_bins),

88 RenderQueueItemVerifier<float>(num_noise_bins)) {	87 RenderQueueItemVerifier<float>(num_noise_bins)) {

89 RTC_DCHECK_LE(kRho, 1.f);	88 RTC_DCHECK_LE(kRho, 1.f);

90	89

91 const size_t erb_index = static_cast<size_t>(	90 const size_t erb_index = static_cast<size_t>(

(...skipping 214 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
306 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);	305 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);

307 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	306 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

308 chunks_since_voice_ = 0;	307 chunks_since_voice_ = 0;

309 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	308 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

310 ++chunks_since_voice_;	309 ++chunks_since_voice_;

311 }	310 }

312 return chunks_since_voice_ < kSpeechOffsetDelay;	311 return chunks_since_voice_ < kSpeechOffsetDelay;

313 }	312 }

314	313

315 } // namespace webrtc	314 } // namespace webrtc

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »