webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 2104273002: Add logging to Intelligibility Enhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2104273002: Add logging to Intelligibility Enhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Log active percentage Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

12	12

13 #include <math.h>	13 #include <math.h>

14 #include <stdlib.h>	14 #include <stdlib.h>

15 #include <algorithm>	15 #include <algorithm>

16 #include <limits>	16 #include <limits>

17 #include <numeric>	17 #include <numeric>

18	18

19 #include "webrtc/base/checks.h"	19 #include "webrtc/base/checks.h"

	20 #include "webrtc/base/logging.h"

20 #include "webrtc/common_audio/include/audio_util.h"	21 #include "webrtc/common_audio/include/audio_util.h"

21 #include "webrtc/common_audio/window_generator.h"	22 #include "webrtc/common_audio/window_generator.h"

22	23

23 namespace webrtc {	24 namespace webrtc {

24	25

25 namespace {	26 namespace {

26	27

27 const size_t kErbResolution = 2;	28 const size_t kErbResolution = 2;

28 const int kWindowSizeMs = 16;	29 const int kWindowSizeMs = 16;

29 const int kChunkSizeMs = 10; // Size provided by APM.	30 const int kChunkSizeMs = 10; // Size provided by APM.

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
83 capture_filter_bank_(CreateErbBank(num_noise_bins)),	84 capture_filter_bank_(CreateErbBank(num_noise_bins)),

84 render_filter_bank_(CreateErbBank(freqs_)),	85 render_filter_bank_(CreateErbBank(freqs_)),

85 gains_eq_(bank_size_),	86 gains_eq_(bank_size_),

86 gain_applier_(freqs_, kMaxRelativeGainChange),	87 gain_applier_(freqs_, kMaxRelativeGainChange),

87 audio_s16_(chunk_length_),	88 audio_s16_(chunk_length_),

88 chunks_since_voice_(kSpeechOffsetDelay),	89 chunks_since_voice_(kSpeechOffsetDelay),

89 is_speech_(false),	90 is_speech_(false),

90 snr_(kMaxActiveSNR),	91 snr_(kMaxActiveSNR),

91 is_active_(false),	92 is_active_(false),

92 num_chunks_(0u),	93 num_chunks_(0u),

	94 num_active_chunks_(0u),

93 noise_estimation_buffer_(num_noise_bins),	95 noise_estimation_buffer_(num_noise_bins),

94 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,	96 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,

95 std::vector<float>(num_noise_bins),	97 std::vector<float>(num_noise_bins),

96 RenderQueueItemVerifier<float>(num_noise_bins)) {	98 RenderQueueItemVerifier<float>(num_noise_bins)) {

97 RTC_DCHECK_LE(kRho, 1.f);	99 RTC_DCHECK_LE(kRho, 1.f);

98	100

99 const size_t erb_index = static_cast<size_t>(	101 const size_t erb_index = static_cast<size_t>(

100 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +	102 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +

101 43.f));	103 43.f));

102 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);	104 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);

103	105

104 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);	106 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);

105 std::vector<float> kbd_window(window_size);	107 std::vector<float> kbd_window(window_size);

106 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,	108 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,

107 kbd_window.data());	109 kbd_window.data());

108 render_mangler_.reset(new LappedTransform(	110 render_mangler_.reset(new LappedTransform(

109 num_render_channels_, num_render_channels_, chunk_length_,	111 num_render_channels_, num_render_channels_, chunk_length_,

110 kbd_window.data(), window_size, window_size / 2, this));	112 kbd_window.data(), window_size, window_size / 2, this));

111 }	113 }

112	114

	115 IntelligibilityEnhancer::~IntelligibilityEnhancer() {

	116 LOG(LS_INFO) << "Intelligibility Enhancer was active for "
	hlundin-webrtc 2016/06/30 08:59:36 The dtor is not always executed at the end of a ca The dtor is not always executed at the end of a call. At least in Chrome, a fast shutdown is performed if the user just closes the tab. +Ivo will know more about this. But in essence, beware that you might not get the logging you expect. ivoc 2016/06/30 09:13:48 Indeed, don't rely on destructors being called in Show quoted text On 2016/06/30 08:59:36, hlundin-webrtc wrote: > The dtor is not always executed at the end of a call. At least in Chrome, a fast > shutdown is performed if the user just closes the tab. +Ivo will know more about > this. But in essence, beware that you might not get the logging you expect. Indeed, don't rely on destructors being called in Chrome. It may make more sense to log this periodically instead. aluebs-webrtc 2016/06/30 23:21:54 Thank you for pointing this out. I have been testi Show quoted text On 2016/06/30 09:13:48, ivoc wrote: > On 2016/06/30 08:59:36, hlundin-webrtc wrote: > > The dtor is not always executed at the end of a call. At least in Chrome, a > fast > > shutdown is performed if the user just closes the tab. +Ivo will know more > about > > this. But in essence, beware that you might not get the logging you expect. > > Indeed, don't rely on destructors being called in Chrome. It may make more sense > to log this periodically instead. Thank you for pointing this out. I have been testing this a bit and it seems it runs the destructor if the user hangs up, but it doesn't if they directly kill the app from the app-switcher. I will leave this here, since I think it adds value, but also added the num_chunk_ when the IE is activated and deactivated so this percentage can be calculated from there if the destructor was not run.
	117 << static_cast<float>(num_active_chunks_) / num_chunks_

	118 << "%% of the call.";

	119 }

	120

113 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(	121 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(

114 std::vector<float> noise, int gain_db) {	122 std::vector<float> noise, int gain_db) {

115 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);	123 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);

116 const float gain = std::pow(10.f, gain_db / 20.f);	124 const float gain = std::pow(10.f, gain_db / 20.f);

117 for (auto& bin : noise) {	125 for (auto& bin : noise) {

118 bin *= gain;	126 bin *= gain;

119 }	127 }

120 // Disregarding return value since buffer overflow is acceptable, because it	128 // Disregarding return value since buffer overflow is acceptable, because it

121 // is not critical to get each noise estimate.	129 // is not critical to get each noise estimate.

122 if (noise_estimation_queue_.Insert(&noise)) {	130 if (noise_estimation_queue_.Insert(&noise)) {

(...skipping 16 matching lines...) Expand all Loading...
139 const std::complex<float>* const* in_block,	147 const std::complex<float>* const* in_block,

140 size_t in_channels,	148 size_t in_channels,

141 size_t frames,	149 size_t frames,

142 size_t /* out_channels */,	150 size_t /* out_channels */,

143 std::complex<float>* const* out_block) {	151 std::complex<float>* const* out_block) {

144 RTC_DCHECK_EQ(freqs_, frames);	152 RTC_DCHECK_EQ(freqs_, frames);

145 if (is_speech_) {	153 if (is_speech_) {

146 clear_power_estimator_.Step(in_block[0]);	154 clear_power_estimator_.Step(in_block[0]);

147 }	155 }

148 SnrBasedEffectActivation();	156 SnrBasedEffectActivation();

149 if (is_active_ && num_chunks_++ % kGainUpdatePeriod == 0) {	157 if (is_active_) {

150 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,	158 ++num_active_chunks_;

151 filtered_clear_pow_.data());	159 if (num_chunks_++ % kGainUpdatePeriod == 0) {

152 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,	160 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,

153 filtered_noise_pow_.data());	161 filtered_clear_pow_.data());

154 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());	162 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,

155 const float power_target = std::accumulate(	163 filtered_noise_pow_.data());

156 filtered_clear_pow_.data(),	164 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

157 filtered_clear_pow_.data() + bank_size_,	165 const float power_target = std::accumulate(

158 0.f);	166 filtered_clear_pow_.data(),

159 const float power_top =	167 filtered_clear_pow_.data() + bank_size_,

160 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	168 0.f);

161 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());	169 const float power_top =

162 const float power_bot =	170 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

163 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	171 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());

164 if (power_target >= power_bot && power_target <= power_top) {	172 const float power_bot =

165 SolveForLambda(power_target);	173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

166 UpdateErbGains();	174 if (power_target >= power_bot && power_target <= power_top) {

167 } // Else experiencing power underflow, so do nothing.	175 SolveForLambda(power_target);

	176 UpdateErbGains();

	177 } // Else experiencing power underflow, so do nothing.

	178 }

168 }	179 }

169 for (size_t i = 0; i < in_channels; ++i) {	180 for (size_t i = 0; i < in_channels; ++i) {

170 gain_applier_.Apply(in_block[i], out_block[i]);	181 gain_applier_.Apply(in_block[i], out_block[i]);

171 }	182 }

172 }	183 }

173	184

174 void IntelligibilityEnhancer::SnrBasedEffectActivation() {	185 void IntelligibilityEnhancer::SnrBasedEffectActivation() {

175 const float* clear_psd = clear_power_estimator_.power().data();	186 const float* clear_psd = clear_power_estimator_.power().data();

176 const float* noise_psd = noise_power_estimator_.power().data();	187 const float* noise_psd = noise_power_estimator_.power().data();

177 const float clear_power =	188 const float clear_power =

178 std::accumulate(clear_psd, clear_psd + freqs_, 0.f);	189 std::accumulate(clear_psd, clear_psd + freqs_, 0.f);

179 const float noise_power =	190 const float noise_power =

180 std::accumulate(noise_psd, noise_psd + freqs_, 0.f);	191 std::accumulate(noise_psd, noise_psd + freqs_, 0.f);

181 snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /	192 snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /

182 (noise_power + std::numeric_limits<float>::epsilon());	193 (noise_power + std::numeric_limits<float>::epsilon());

183 if (is_active_) {	194 if (is_active_) {

184 if (snr_ > kMaxActiveSNR) {	195 if (snr_ > kMaxActiveSNR) {

	196 LOG(LS_INFO) << "Deactivating Intelligibility Enhancer.";

185 is_active_ = false;	197 is_active_ = false;

186 // Set the target gains to unity.	198 // Set the target gains to unity.

187 float* gains = gain_applier_.target();	199 float* gains = gain_applier_.target();

188 for (size_t i = 0; i < freqs_; ++i) {	200 for (size_t i = 0; i < freqs_; ++i) {

189 gains[i] = 1.f;	201 gains[i] = 1.f;

190 }	202 }

191 }	203 }

192 } else {	204 } else {

193 is_active_ = snr_ < kMinInactiveSNR;	205 if (snr_ < kMinInactiveSNR) {

	206 LOG(LS_INFO) << "Activating Intelligibility Enhancer.";

	207 is_active_ = true;

	208 }

194 }	209 }

195 }	210 }

196	211

197 void IntelligibilityEnhancer::SolveForLambda(float power_target) {	212 void IntelligibilityEnhancer::SolveForLambda(float power_target) {

198 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values	213 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values

199 const int kMaxIters = 100; // for these, based on experiments.	214 const int kMaxIters = 100; // for these, based on experiments.

200	215

201 const float reciprocal_power_target =	216 const float reciprocal_power_target =

202 1.f / (power_target + std::numeric_limits<float>::epsilon());	217 1.f / (power_target + std::numeric_limits<float>::epsilon());

203 float lambda_bot = kLambdaBot;	218 float lambda_bot = kLambdaBot;

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
344 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);	359 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);

345 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	360 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

346 chunks_since_voice_ = 0;	361 chunks_since_voice_ = 0;

347 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	362 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

348 ++chunks_since_voice_;	363 ++chunks_since_voice_;

349 }	364 }

350 return chunks_since_voice_ < kSpeechOffsetDelay;	365 return chunks_since_voice_ < kSpeechOffsetDelay;

351 }	366 }

352	367

353 } // namespace webrtc	368 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »