webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 2104273002: Add logging to Intelligibility Enhancer

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 2104273002: Add logging to Intelligibility Enhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Log num_chunks_ when activating and deactivating Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"	11 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhanc er.h"

12	12

13 #include <math.h>	13 #include <math.h>

14 #include <stdlib.h>	14 #include <stdlib.h>

15 #include <algorithm>	15 #include <algorithm>

16 #include <limits>	16 #include <limits>

17 #include <numeric>	17 #include <numeric>

18	18

19 #include "webrtc/base/checks.h"	19 #include "webrtc/base/checks.h"

	20 #include "webrtc/base/logging.h"

20 #include "webrtc/common_audio/include/audio_util.h"	21 #include "webrtc/common_audio/include/audio_util.h"

21 #include "webrtc/common_audio/window_generator.h"	22 #include "webrtc/common_audio/window_generator.h"

22	23

23 namespace webrtc {	24 namespace webrtc {

24	25

25 namespace {	26 namespace {

26	27

27 const size_t kErbResolution = 2;	28 const size_t kErbResolution = 2;

28 const int kWindowSizeMs = 16;	29 const int kWindowSizeMs = 16;

29 const int kChunkSizeMs = 10; // Size provided by APM.	30 const int kChunkSizeMs = 10; // Size provided by APM.

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
83 capture_filter_bank_(CreateErbBank(num_noise_bins)),	84 capture_filter_bank_(CreateErbBank(num_noise_bins)),

84 render_filter_bank_(CreateErbBank(freqs_)),	85 render_filter_bank_(CreateErbBank(freqs_)),

85 gains_eq_(bank_size_),	86 gains_eq_(bank_size_),

86 gain_applier_(freqs_, kMaxRelativeGainChange),	87 gain_applier_(freqs_, kMaxRelativeGainChange),

87 audio_s16_(chunk_length_),	88 audio_s16_(chunk_length_),

88 chunks_since_voice_(kSpeechOffsetDelay),	89 chunks_since_voice_(kSpeechOffsetDelay),

89 is_speech_(false),	90 is_speech_(false),

90 snr_(kMaxActiveSNR),	91 snr_(kMaxActiveSNR),

91 is_active_(false),	92 is_active_(false),

92 num_chunks_(0u),	93 num_chunks_(0u),

	94 num_active_chunks_(0u),

93 noise_estimation_buffer_(num_noise_bins),	95 noise_estimation_buffer_(num_noise_bins),

94 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,	96 noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,

95 std::vector<float>(num_noise_bins),	97 std::vector<float>(num_noise_bins),

96 RenderQueueItemVerifier<float>(num_noise_bins)) {	98 RenderQueueItemVerifier<float>(num_noise_bins)) {

97 RTC_DCHECK_LE(kRho, 1.f);	99 RTC_DCHECK_LE(kRho, 1.f);

98	100

99 const size_t erb_index = static_cast<size_t>(	101 const size_t erb_index = static_cast<size_t>(

100 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +	102 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +

101 43.f));	103 43.f));

102 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);	104 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);

103	105

104 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);	106 size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);

105 std::vector<float> kbd_window(window_size);	107 std::vector<float> kbd_window(window_size);

106 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,	108 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,

107 kbd_window.data());	109 kbd_window.data());

108 render_mangler_.reset(new LappedTransform(	110 render_mangler_.reset(new LappedTransform(

109 num_render_channels_, num_render_channels_, chunk_length_,	111 num_render_channels_, num_render_channels_, chunk_length_,

110 kbd_window.data(), window_size, window_size / 2, this));	112 kbd_window.data(), window_size, window_size / 2, this));

111 }	113 }

112	114

	115 IntelligibilityEnhancer::~IntelligibilityEnhancer() {

	116 // Don't rely on this log, since the destructor isn't called when the app/tab

	117 // is killed.

	118 LOG(LS_INFO) << "Intelligibility Enhancer was active for "

	119 << static_cast<float>(num_active_chunks_) / num_chunks_

	120 << "% of the call.";

	121 }

	122

113 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(	123 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(

114 std::vector<float> noise, int gain_db) {	124 std::vector<float> noise, int gain_db) {

115 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);	125 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);

116 const float gain = std::pow(10.f, gain_db / 20.f);	126 const float gain = std::pow(10.f, gain_db / 20.f);

117 for (auto& bin : noise) {	127 for (auto& bin : noise) {

118 bin *= gain;	128 bin *= gain;

119 }	129 }

120 // Disregarding return value since buffer overflow is acceptable, because it	130 // Disregarding return value since buffer overflow is acceptable, because it

121 // is not critical to get each noise estimate.	131 // is not critical to get each noise estimate.

122 if (noise_estimation_queue_.Insert(&noise)) {	132 if (noise_estimation_queue_.Insert(&noise)) {

(...skipping 16 matching lines...) Expand all Loading...
139 const std::complex<float>* const* in_block,	149 const std::complex<float>* const* in_block,

140 size_t in_channels,	150 size_t in_channels,

141 size_t frames,	151 size_t frames,

142 size_t /* out_channels */,	152 size_t /* out_channels */,

143 std::complex<float>* const* out_block) {	153 std::complex<float>* const* out_block) {

144 RTC_DCHECK_EQ(freqs_, frames);	154 RTC_DCHECK_EQ(freqs_, frames);

145 if (is_speech_) {	155 if (is_speech_) {

146 clear_power_estimator_.Step(in_block[0]);	156 clear_power_estimator_.Step(in_block[0]);

147 }	157 }

148 SnrBasedEffectActivation();	158 SnrBasedEffectActivation();

149 if (is_active_ && num_chunks_++ % kGainUpdatePeriod == 0) {	159 ++num_chunks_;

150 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,	160 if (is_active_) {

151 filtered_clear_pow_.data());	161 ++num_active_chunks_;

152 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,	162 if (num_chunks_ % kGainUpdatePeriod == 0) {

153 filtered_noise_pow_.data());	163 MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,

154 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());	164 filtered_clear_pow_.data());

155 const float power_target = std::accumulate(	165 MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,

156 filtered_clear_pow_.data(),	166 filtered_noise_pow_.data());

157 filtered_clear_pow_.data() + bank_size_,	167 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

158 0.f);	168 const float power_target = std::accumulate(

159 const float power_top =	169 filtered_clear_pow_.data(),

160 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	170 filtered_clear_pow_.data() + bank_size_,

161 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());	171 0.f);

162 const float power_bot =	172 const float power_top =

163 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	173 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

164 if (power_target >= power_bot && power_target <= power_top) {	174 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());

165 SolveForLambda(power_target);	175 const float power_bot =

166 UpdateErbGains();	176 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

167 } // Else experiencing power underflow, so do nothing.	177 if (power_target >= power_bot && power_target <= power_top) {

	178 SolveForLambda(power_target);

	179 UpdateErbGains();

	180 } // Else experiencing power underflow, so do nothing.

	181 }

168 }	182 }

169 for (size_t i = 0; i < in_channels; ++i) {	183 for (size_t i = 0; i < in_channels; ++i) {

170 gain_applier_.Apply(in_block[i], out_block[i]);	184 gain_applier_.Apply(in_block[i], out_block[i]);

171 }	185 }

172 }	186 }

173	187

174 void IntelligibilityEnhancer::SnrBasedEffectActivation() {	188 void IntelligibilityEnhancer::SnrBasedEffectActivation() {

175 const float* clear_psd = clear_power_estimator_.power().data();	189 const float* clear_psd = clear_power_estimator_.power().data();

176 const float* noise_psd = noise_power_estimator_.power().data();	190 const float* noise_psd = noise_power_estimator_.power().data();

177 const float clear_power =	191 const float clear_power =

178 std::accumulate(clear_psd, clear_psd + freqs_, 0.f);	192 std::accumulate(clear_psd, clear_psd + freqs_, 0.f);

179 const float noise_power =	193 const float noise_power =

180 std::accumulate(noise_psd, noise_psd + freqs_, 0.f);	194 std::accumulate(noise_psd, noise_psd + freqs_, 0.f);

181 snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /	195 snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /

182 (noise_power + std::numeric_limits<float>::epsilon());	196 (noise_power + std::numeric_limits<float>::epsilon());

183 if (is_active_) {	197 if (is_active_) {

184 if (snr_ > kMaxActiveSNR) {	198 if (snr_ > kMaxActiveSNR) {

	199 LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
	ivoc 2016/07/01 08:04:30 This should probably say "deactivated". This should probably say "deactivated". aluebs-webrtc 2016/07/01 22:11:13 Good catch. Done. Show quoted text On 2016/07/01 08:04:30, ivoc wrote: > This should probably say "deactivated". Good catch. Done.
	200 << num_chunks_;

185 is_active_ = false;	201 is_active_ = false;

186 // Set the target gains to unity.	202 // Set the target gains to unity.

187 float* gains = gain_applier_.target();	203 float* gains = gain_applier_.target();

188 for (size_t i = 0; i < freqs_; ++i) {	204 for (size_t i = 0; i < freqs_; ++i) {

189 gains[i] = 1.f;	205 gains[i] = 1.f;

190 }	206 }

191 }	207 }

192 } else {	208 } else {

193 is_active_ = snr_ < kMinInactiveSNR;	209 if (snr_ < kMinInactiveSNR) {

	210 LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "

	211 << num_chunks_;

	212 is_active_ = true;

	213 }

194 }	214 }

195 }	215 }

196	216

197 void IntelligibilityEnhancer::SolveForLambda(float power_target) {	217 void IntelligibilityEnhancer::SolveForLambda(float power_target) {

198 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values	218 const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values

199 const int kMaxIters = 100; // for these, based on experiments.	219 const int kMaxIters = 100; // for these, based on experiments.

200	220

201 const float reciprocal_power_target =	221 const float reciprocal_power_target =

202 1.f / (power_target + std::numeric_limits<float>::epsilon());	222 1.f / (power_target + std::numeric_limits<float>::epsilon());

203 float lambda_bot = kLambdaBot;	223 float lambda_bot = kLambdaBot;

(...skipping 140 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
344 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);	364 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);

345 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	365 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

346 chunks_since_voice_ = 0;	366 chunks_since_voice_ = 0;

347 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	367 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

348 ++chunks_since_voice_;	368 ++chunks_since_voice_;

349 }	369 }

350 return chunks_since_voice_ < kSpeechOffsetDelay;	370 return chunks_since_voice_ < kSpeechOffsetDelay;

351 }	371 }

352	372

353 } // namespace webrtc	373 } // namespace webrtc

OLD	NEW

« no previous file with comments | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | no next file » | no next file with comments »