webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 1766383002: Convert IntelligibilityEnhancer to multi-threaded mode

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1766383002: Convert IntelligibilityEnhancer to multi-threaded mode (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@master

Patch Set: Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« webrtc/modules/audio_processing/include/mock_audio_processing.h ('K') | « webrtc/modules/audio_processing/include/mock_audio_processing.h ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

13	13

14 #include <complex>	14 #include <complex>

15 #include <memory>	15 #include <memory>

16 #include <vector>	16 #include <vector>

17	17

18 #include "webrtc/common_audio/lapped_transform.h"	18 #include "webrtc/common_audio/lapped_transform.h"

19 #include "webrtc/common_audio/channel_buffer.h"	19 #include "webrtc/common_audio/channel_buffer.h"

	20 #include "webrtc/common_audio/swap_queue.h"

20 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	21 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

	22 #include "webrtc/modules/audio_processing/processing_component.h"

21 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"	23 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"

22	24

23 namespace webrtc {	25 namespace webrtc {

24	26

25 // Speech intelligibility enhancement module. Reads render and capture	27 // Speech intelligibility enhancement module. Reads render and capture

26 // audio streams and modifies the render stream with a set of gains per	28 // audio streams and modifies the render stream with a set of gains per

27 // frequency bin to enhance speech against the noise background.	29 // frequency bin to enhance speech against the noise background.

28 // Details of the model and algorithm can be found in the original paper:	30 // Details of the model and algorithm can be found in the original paper:

29 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788	31 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788

30 class IntelligibilityEnhancer : public LappedTransform::Callback {	32 class IntelligibilityEnhancer : public LappedTransform::Callback {

31 public:	33 public:

32 IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels);	34 IntelligibilityEnhancer(int sample_rate_hz,

	35 size_t num_render_channels,

	36 size_t num_noise_bins);

33	37

34 // Sets the capture noise magnitude spectrum estimate.	38 // Sets the capture noise magnitude spectrum estimate.

35 void SetCaptureNoiseEstimate(std::vector<float> noise);	39 void SetCaptureNoiseEstimate(std::vector<float> noise);

36	40

37 // Reads chunk of speech in time domain and updates with modified signal.	41 // Reads chunk of speech in time domain and updates with modified signal.

38 void ProcessRenderAudio(float* const* audio,	42 void ProcessRenderAudio(float* const* audio,

39 int sample_rate_hz,	43 int sample_rate_hz,

40 size_t num_channels);	44 size_t num_channels);

41 bool active() const;	45 bool active() const;

42	46

(...skipping 22 matching lines...) Expand all Loading...
65 // Initializes ERB filterbank.	69 // Initializes ERB filterbank.

66 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);	70 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);

67	71

68 // Analytically solves quadratic for optimal gains given \|lambda\|.	72 // Analytically solves quadratic for optimal gains given \|lambda\|.

69 // Negative gains are set to 0. Stores the results in \|sols\|.	73 // Negative gains are set to 0. Stores the results in \|sols\|.

70 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);	74 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

71	75

72 // Returns true if the audio is speech.	76 // Returns true if the audio is speech.

73 bool IsSpeech(const float* audio);	77 bool IsSpeech(const float* audio);

74	78

	79 // TODO(aluebs): Decrease this once we properly handle hugely unbalanced

	80 // reverse and forward call numbers.

	81 static const size_t kMaxNumFramesToBuffer = 100;

	82
	peah-webrtc 2016/03/08 07:02:38 The size of this buffer seems a bit high. The reas The size of this buffer seems a bit high. The reason for the large sizes in the buffers for the AECs is that data must not be dropped there. But in this case that requirement is not that superimportant, right? (which is reflected on the fact that the code that inserts data into the buffer (correctly) does not take buffer overflows into account). I would have expected a 10-50 ms of data to be sufficient for the buffer as what is passed is estimates of the stationary part of the capture noise spectrum which should not change much (as it is stationary) which means that dropped data due to thread stalls should not at all be critical. Wdyt? peah-webrtc 2016/03/08 07:02:38 Please rename as it is not frames that are buffere Please rename as it is not frames that are buffered, but rather noise spectra, right? aluebs-webrtc 2016/03/08 10:53:00 Done. Show quoted text On 2016/03/08 07:02:38, peah-webrtc wrote: > Please rename as it is not frames that are buffered, but rather noise spectra, > right? Done. aluebs-webrtc 2016/03/08 10:53:00 That makes a lot of sense. I just copied and paste Show quoted text On 2016/03/08 07:02:38, peah-webrtc wrote: > The size of this buffer seems a bit high. The reason for the large sizes in the > buffers for the AECs is that data must not be dropped there. > But in this case that requirement is not that superimportant, right? (which is > reflected on the fact that the code that inserts data into the buffer > (correctly) does not take buffer overflows into account). > > I would have expected a 10-50 ms of data to be sufficient for the buffer as what > is passed is estimates of the stationary part of the capture noise spectrum > which should not change much (as it is stationary) which means that dropped data > due to thread stalls should not at all be critical. > > Wdyt? That makes a lot of sense. I just copied and pasted without much thought on the actual value.
75 const size_t freqs_; // Num frequencies in frequency domain.	83 const size_t freqs_; // Num frequencies in frequency domain.

	84 const size_t num_noise_bins_;

76 const size_t chunk_length_; // Chunk size in samples.	85 const size_t chunk_length_; // Chunk size in samples.

77 const size_t bank_size_; // Num ERB filters.	86 const size_t bank_size_; // Num ERB filters.

78 const int sample_rate_hz_;	87 const int sample_rate_hz_;

79 const size_t num_render_channels_;	88 const size_t num_render_channels_;

80	89

81 intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;	90 intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;

82 std::unique_ptr<intelligibility::PowerEstimator<float>>	91 intelligibility::PowerEstimator<float> noise_power_estimator_;

83 noise_power_estimator_;

84 std::vector<float> filtered_clear_pow_;	92 std::vector<float> filtered_clear_pow_;

85 std::vector<float> filtered_noise_pow_;	93 std::vector<float> filtered_noise_pow_;

86 std::vector<float> center_freqs_;	94 std::vector<float> center_freqs_;

87 std::vector<std::vector<float>> capture_filter_bank_;	95 std::vector<std::vector<float>> capture_filter_bank_;

88 std::vector<std::vector<float>> render_filter_bank_;	96 std::vector<std::vector<float>> render_filter_bank_;

89 size_t start_freq_;	97 size_t start_freq_;

90	98

91 std::vector<float> gains_eq_; // Pre-filter modified gains.	99 std::vector<float> gains_eq_; // Pre-filter modified gains.

92 intelligibility::GainApplier gain_applier_;	100 intelligibility::GainApplier gain_applier_;

93	101

94 std::unique_ptr<LappedTransform> render_mangler_;	102 std::unique_ptr<LappedTransform> render_mangler_;

95	103

96 VoiceActivityDetector vad_;	104 VoiceActivityDetector vad_;

97 std::vector<int16_t> audio_s16_;	105 std::vector<int16_t> audio_s16_;

98 size_t chunks_since_voice_;	106 size_t chunks_since_voice_;

99 bool is_speech_;	107 bool is_speech_;

	108

	109 std::vector<float> noise_estimation_buffer_;

	110 rtc::scoped_ptr<SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>>

	111 noise_estimation_queue_;

100 };	112 };

101	113

102 } // namespace webrtc	114 } // namespace webrtc

103	115

104 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	116 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW