webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 2035213002: Only update Intelligibility Enhancer gains every 10 chunks

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2035213002: Only update Intelligibility Enhancer gains every 10 chunks (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 22 matching lines...) Expand all Loading...
33 const float kLambdaTop = -1e-5f; // search for lamda.	33 const float kLambdaTop = -1e-5f; // search for lamda.

34 const float kVoiceProbabilityThreshold = 0.02f;	34 const float kVoiceProbabilityThreshold = 0.02f;

35 // Number of chunks after voice activity which is still considered speech.	35 // Number of chunks after voice activity which is still considered speech.

36 const size_t kSpeechOffsetDelay = 80;	36 const size_t kSpeechOffsetDelay = 80;

37 const float kDecayRate = 0.994f; // Power estimation decay rate.	37 const float kDecayRate = 0.994f; // Power estimation decay rate.

38 const float kMaxRelativeGainChange = 0.006f;	38 const float kMaxRelativeGainChange = 0.006f;

39 const float kRho = 0.0004f; // Default production and interpretation SNR.	39 const float kRho = 0.0004f; // Default production and interpretation SNR.

40 const float kPowerNormalizationFactor = 1.f / (1 << 30);	40 const float kPowerNormalizationFactor = 1.f / (1 << 30);

41 const float kMaxActiveSNR = 128.f; // 21dB	41 const float kMaxActiveSNR = 128.f; // 21dB

42 const float kMinInactiveSNR = 32.f; // 15dB	42 const float kMinInactiveSNR = 32.f; // 15dB

	43 const size_t kGainUpdatePeriod = 10u;

43	44

44 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.	45 // Returns dot product of vectors \|a\| and \|b\| with size \|length\|.

45 float DotProduct(const float* a, const float* b, size_t length) {	46 float DotProduct(const float* a, const float* b, size_t length) {

46 float ret = 0.f;	47 float ret = 0.f;

47 for (size_t i = 0; i < length; ++i) {	48 for (size_t i = 0; i < length; ++i) {

48 ret += a[i] * b[i];	49 ret += a[i] * b[i];

49 }	50 }

50 return ret;	51 return ret;

51 }	52 }

52	53

(...skipping 28 matching lines...) Expand all Loading...
81 center_freqs_(bank_size_),	82 center_freqs_(bank_size_),

82 capture_filter_bank_(CreateErbBank(num_noise_bins)),	83 capture_filter_bank_(CreateErbBank(num_noise_bins)),

83 render_filter_bank_(CreateErbBank(freqs_)),	84 render_filter_bank_(CreateErbBank(freqs_)),

84 gains_eq_(bank_size_),	85 gains_eq_(bank_size_),

85 gain_applier_(freqs_, kMaxRelativeGainChange),	86 gain_applier_(freqs_, kMaxRelativeGainChange),

86 audio_s16_(chunk_length_),	87 audio_s16_(chunk_length_),

87 chunks_since_voice_(kSpeechOffsetDelay),	88 chunks_since_voice_(kSpeechOffsetDelay),

88 is_speech_(false),	89 is_speech_(false),

89 snr_(kMaxActiveSNR),	90 snr_(kMaxActiveSNR),

90 is_active_(false),	91 is_active_(false),

	92 num_chunks_(0u),

91 noise_estimation_buffer_(num_noise_bins),	93 noise_estimation_buffer_(num_noise_bins),

92 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,	94 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,

93 std::vector<float>(num_noise_bins),	95 std::vector<float>(num_noise_bins),

94 RenderQueueItemVerifier<float>(num_noise_bins)) {	96 RenderQueueItemVerifier<float>(num_noise_bins)) {

95 RTC_DCHECK_LE(kRho, 1.f);	97 RTC_DCHECK_LE(kRho, 1.f);

96	98

97 const size_t erb_index = static_cast<size_t>(	99 const size_t erb_index = static_cast<size_t>(

98 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +	100 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +

99 43.f));	101 43.f));

100 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);	102 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
137 const std::complex<float>* const* in_block,	139 const std::complex<float>* const* in_block,

138 size_t in_channels,	140 size_t in_channels,

139 size_t frames,	141 size_t frames,

140 size_t /* out_channels */,	142 size_t /* out_channels */,

141 std::complex<float>* const* out_block) {	143 std::complex<float>* const* out_block) {

142 RTC_DCHECK_EQ(freqs_, frames);	144 RTC_DCHECK_EQ(freqs_, frames);

143 if (is_speech_) {	145 if (is_speech_) {

144 clear_power_estimator_.Step(in_block[0]);	146 clear_power_estimator_.Step(in_block[0]);

145 }	147 }

146 SnrBasedEffectActivation();	148 SnrBasedEffectActivation();

147 if (is_active_) {	149 if (is_active_ && num_chunks_++ % kGainUpdatePeriod == 0) {

148 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,	150 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,

149 filtered_clear_pow_.data());	151 filtered_clear_pow_.data());

150 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,	152 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,

151 filtered_noise_pow_.data());	153 filtered_noise_pow_.data());

152 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());	154 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

153 const float power_target = std::accumulate(	155 const float power_target = std::accumulate(

154 filtered_clear_pow_.data(),	156 filtered_clear_pow_.data(),

155 filtered_clear_pow_.data() + bank_size_,	157 filtered_clear_pow_.data() + bank_size_,

156 0.f);	158 0.f);

157 const float power_top =	159 const float power_top =

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
342 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);	344 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);

343 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	345 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

344 chunks_since_voice_ = 0;	346 chunks_since_voice_ = 0;

345 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	347 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

346 ++chunks_since_voice_;	348 ++chunks_since_voice_;

347 }	349 }

348 return chunks_since_voice_ < kSpeechOffsetDelay;	350 return chunks_since_voice_ < kSpeechOffsetDelay;

349 }	351 }

350	352

351 } // namespace webrtc	353 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »