webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc - Issue 1766383002: Convert IntelligibilityEnhancer to multi-threaded mode

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc

Issue 1766383002: Convert IntelligibilityEnhancer to multi-threaded mode (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Another way to suppress warning Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff |

« webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('K') | « webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 43 matching lines...) Loading...
54 float* result) {	54 float* result) {

55 for (size_t i = 0; i < filter_bank.size(); ++i) {	55 for (size_t i = 0; i < filter_bank.size(); ++i) {

56 RTC_DCHECK_GT(filter_bank[i].size(), 0u);	56 RTC_DCHECK_GT(filter_bank[i].size(), 0u);

57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());	57 result[i] = DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());

58 }	58 }

59 }	59 }

60	60

61 } // namespace	61 } // namespace

62	62

63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,	63 IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,

64 size_t num_render_channels)	64 size_t num_render_channels,

	65 size_t num_noise_bins)

65 : freqs_(RealFourier::ComplexLength(	66 : freqs_(RealFourier::ComplexLength(

66 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),	67 RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),

	68 num_noise_bins_(num_noise_bins),

67 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),	69 chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),

68 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),	70 bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),

69 sample_rate_hz_(sample_rate_hz),	71 sample_rate_hz_(sample_rate_hz),

70 num_render_channels_(num_render_channels),	72 num_render_channels_(num_render_channels),

71 clear_power_estimator_(freqs_, kDecayRate),	73 clear_power_estimator_(freqs_, kDecayRate),

72 noise_power_estimator_(	74 noise_power_estimator_(freqs_, kDecayRate),

73 new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)),

74 filtered_clear_pow_(bank_size_, 0.f),	75 filtered_clear_pow_(bank_size_, 0.f),

75 filtered_noise_pow_(bank_size_, 0.f),	76 filtered_noise_pow_(num_noise_bins, 0.f),

76 center_freqs_(bank_size_),	77 center_freqs_(bank_size_),

	78 capture_filter_bank_(CreateErbBank(num_noise_bins)),

77 render_filter_bank_(CreateErbBank(freqs_)),	79 render_filter_bank_(CreateErbBank(freqs_)),

78 gains_eq_(bank_size_),	80 gains_eq_(bank_size_),

79 gain_applier_(freqs_, kMaxRelativeGainChange),	81 gain_applier_(freqs_, kMaxRelativeGainChange),

80 audio_s16_(chunk_length_),	82 audio_s16_(chunk_length_),

81 chunks_since_voice_(kSpeechOffsetDelay),	83 chunks_since_voice_(kSpeechOffsetDelay),

82 is_speech_(false) {	84 is_speech_(false),

	85 noise_estimation_buffer_(num_noise_bins) {

83 RTC_DCHECK_LE(kRho, 1.f);	86 RTC_DCHECK_LE(kRho, 1.f);

84	87

85 const size_t erb_index = static_cast<size_t>(	88 const size_t erb_index = static_cast<size_t>(

86 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +	89 ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +

87 43.f));	90 43.f));

88 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);	91 start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);

89	92

90 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_));	93 size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_));

91 std::vector<float> kbd_window(window_size);	94 std::vector<float> kbd_window(window_size);

92 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,	95 WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,

93 kbd_window.data());	96 kbd_window.data());

94 render_mangler_.reset(new LappedTransform(	97 render_mangler_.reset(new LappedTransform(

95 num_render_channels_, num_render_channels_, chunk_length_,	98 num_render_channels_, num_render_channels_, chunk_length_,

96 kbd_window.data(), window_size, window_size / 2, this));	99 kbd_window.data(), window_size, window_size / 2, this));

	100

	101 std::vector<float> template_queue_element(num_noise_bins);

	102 noise_estimation_queue_.reset(
	the sun 2016/03/09 09:02:55 Can you make to without the scoped_ptr, given this Can you make to without the scoped_ptr, given this is instantiated in ctor? If you decide you must have the extra indirection, use a unique_ptr instead. aluebs-webrtc 2016/03/09 12:18:50 Good point. Done. Show quoted text On 2016/03/09 09:02:55, the sun wrote: > Can you make to without the scoped_ptr, given this is instantiated in ctor? > > If you decide you must have the extra indirection, use a unique_ptr instead. Good point. Done.
	103 new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>(

	104 kMaxNumNoiseEstimatesToBuffer, template_queue_element,

	105 RenderQueueItemVerifier<float>(num_noise_bins)));

97 }	106 }

98	107

99 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(	108 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(

100 std::vector<float> noise) {	109 std::vector<float> noise) {

101 if (capture_filter_bank_.size() != bank_size_ \|\|	110 RTC_DCHECK_EQ(noise.size(), num_noise_bins_);

102 capture_filter_bank_[0].size() != noise.size()) {	111 // Disregarding return value since buffer overflow is acceptable, because it

103 capture_filter_bank_ = CreateErbBank(noise.size());	112 // is not critical to get each noise estimate.

104 noise_power_estimator_.reset(	113 if(noise_estimation_queue_->Insert(&noise)) {};
	the sun 2016/03/09 09:02:55 Something missing here? Something missing here? aluebs-webrtc 2016/03/09 12:18:50 No, that is a way of making all compilers happy an Show quoted text On 2016/03/09 09:02:55, the sun wrote: > Something missing here? No, that is a way of making all compilers happy and not complain about the warn_unused_result warning (casting to void didn't do the trick for android) and was tried to be explained in the comment above. the sun 2016/03/09 12:28:53 Ah, I need to learn how to read. :) nit: space be Show quoted text On 2016/03/09 12:18:50, aluebs-webrtc wrote: > On 2016/03/09 09:02:55, the sun wrote: > > Something missing here? > > No, that is a way of making all compilers happy and not complain about the > warn_unused_result warning (casting to void didn't do the trick for android) and > was tried to be explained in the comment above. Ah, I need to learn how to read. :) nit: space between if and ( aluebs-webrtc 2016/03/09 12:58:16 Done. Show quoted text On 2016/03/09 12:28:53, the sun wrote: > On 2016/03/09 12:18:50, aluebs-webrtc wrote: > > On 2016/03/09 09:02:55, the sun wrote: > > > Something missing here? > > > > No, that is a way of making all compilers happy and not complain about the > > warn_unused_result warning (casting to void didn't do the trick for android) > and > > was tried to be explained in the comment above. > > Ah, I need to learn how to read. :) > > nit: space between if and ( Done.
105 new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate));

106 }

107 noise_power_estimator_->Step(noise.data());

108 }	114 }

109	115

110 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,	116 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,

111 int sample_rate_hz,	117 int sample_rate_hz,

112 size_t num_channels) {	118 size_t num_channels) {

113 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);	119 RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);

114 RTC_CHECK_EQ(num_render_channels_, num_channels);	120 RTC_CHECK_EQ(num_render_channels_, num_channels);

	121 while (noise_estimation_queue_->Remove(&noise_estimation_buffer_)) {

	122 noise_power_estimator_.Step(noise_estimation_buffer_.data());

	123 }

115 is_speech_ = IsSpeech(audio[0]);	124 is_speech_ = IsSpeech(audio[0]);

116 render_mangler_->ProcessChunk(audio, audio);	125 render_mangler_->ProcessChunk(audio, audio);

117 }	126 }

118	127

119 void IntelligibilityEnhancer::ProcessAudioBlock(	128 void IntelligibilityEnhancer::ProcessAudioBlock(

120 const std::complex<float>* const* in_block,	129 const std::complex<float>* const* in_block,

121 size_t in_channels,	130 size_t in_channels,

122 size_t frames,	131 size_t frames,

123 size_t /* out_channels */,	132 size_t /* out_channels */,

124 std::complex<float>* const* out_block) {	133 std::complex<float>* const* out_block) {

125 RTC_DCHECK_EQ(freqs_, frames);	134 RTC_DCHECK_EQ(freqs_, frames);

126 if (is_speech_) {	135 if (is_speech_) {

127 clear_power_estimator_.Step(in_block[0]);	136 clear_power_estimator_.Step(in_block[0]);

128 }	137 }

129 const std::vector<float>& clear_power = clear_power_estimator_.power();	138 const std::vector<float>& clear_power = clear_power_estimator_.power();

130 const std::vector<float>& noise_power = noise_power_estimator_->power();	139 const std::vector<float>& noise_power = noise_power_estimator_.power();

131 MapToErbBands(clear_power.data(), render_filter_bank_,	140 MapToErbBands(clear_power.data(), render_filter_bank_,

132 filtered_clear_pow_.data());	141 filtered_clear_pow_.data());

133 MapToErbBands(noise_power.data(), capture_filter_bank_,	142 MapToErbBands(noise_power.data(), capture_filter_bank_,

134 filtered_noise_pow_.data());	143 filtered_noise_pow_.data());

135 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());	144 SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());

136 const float power_target =	145 const float power_target =

137 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f);	146 std::accumulate(clear_power.data(), clear_power.data() + freqs_, 0.f);

138 const float power_top =	147 const float power_top =

139 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);	148 DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);

140 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());	149 SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());

(...skipping 158 matching lines...) Loading...
299 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);	308 vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);

300 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {	309 if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {

301 chunks_since_voice_ = 0;	310 chunks_since_voice_ = 0;

302 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {	311 } else if (chunks_since_voice_ < kSpeechOffsetDelay) {

303 ++chunks_since_voice_;	312 ++chunks_since_voice_;

304 }	313 }

305 return chunks_since_voice_ < kSpeechOffsetDelay;	314 return chunks_since_voice_ < kSpeechOffsetDelay;

306 }	315 }

307	316

308 } // namespace webrtc	317 } // namespace webrtc

OLD	NEW