webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h - Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 2320833002: Compensate for the IntelligibilityEnhancer processing delay in high bands (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « webrtc/modules/audio_processing/audio_processing_impl.cc ('k') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_	12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_

13	13

14 #include <complex>	14 #include <complex>

15 #include <memory>	15 #include <memory>

16 #include <vector>	16 #include <vector>

17	17

18 #include "webrtc/base/swap_queue.h"	18 #include "webrtc/base/swap_queue.h"

	19 #include "webrtc/common_audio/audio_ring_buffer.h"

	20 #include "webrtc/common_audio/channel_buffer.h"

19 #include "webrtc/common_audio/lapped_transform.h"	21 #include "webrtc/common_audio/lapped_transform.h"

20 #include "webrtc/common_audio/channel_buffer.h"	22 #include "webrtc/modules/audio_processing/audio_buffer.h"

21 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"	23 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"

22 #include "webrtc/modules/audio_processing/render_queue_item_verifier.h"	24 #include "webrtc/modules/audio_processing/render_queue_item_verifier.h"

23 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"	25 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"

24	26

25 namespace webrtc {	27 namespace webrtc {

26	28

27 // Speech intelligibility enhancement module. Reads render and capture	29 // Speech intelligibility enhancement module. Reads render and capture

28 // audio streams and modifies the render stream with a set of gains per	30 // audio streams and modifies the render stream with a set of gains per

29 // frequency bin to enhance speech against the noise background.	31 // frequency bin to enhance speech against the noise background.

30 // Details of the model and algorithm can be found in the original paper:	32 // Details of the model and algorithm can be found in the original paper:

31 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788	33 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788

32 class IntelligibilityEnhancer : public LappedTransform::Callback {	34 class IntelligibilityEnhancer : public LappedTransform::Callback {

33 public:	35 public:

34 IntelligibilityEnhancer(int sample_rate_hz,	36 IntelligibilityEnhancer(int sample_rate_hz,

35 size_t num_render_channels,	37 size_t num_render_channels,

36 size_t num_noise_bins);	38 size_t num_noise_bins);

37	39

38 ~IntelligibilityEnhancer() override;	40 ~IntelligibilityEnhancer() override;

39	41

40 // Sets the capture noise magnitude spectrum estimate.	42 // Sets the capture noise magnitude spectrum estimate.

41 void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);	43 void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);

42	44

43 // Reads chunk of speech in time domain and updates with modified signal.	45 // Reads chunk of speech in time domain and updates with modified signal.

44 void ProcessRenderAudio(float* const* audio,	46 void ProcessRenderAudio(AudioBuffer* audio, int sample_rate_hz);

45 int sample_rate_hz,

46 size_t num_channels);

47 bool active() const;	47 bool active() const;

48	48

49 protected:	49 protected:

50 // All in frequency domain, receives input \|in_block\|, applies	50 // All in frequency domain, receives input \|in_block\|, applies

51 // intelligibility enhancement, and writes result to \|out_block\|.	51 // intelligibility enhancement, and writes result to \|out_block\|.

52 void ProcessAudioBlock(const std::complex<float>* const* in_block,	52 void ProcessAudioBlock(const std::complex<float>* const* in_block,

53 size_t in_channels,	53 size_t in_channels,

54 size_t frames,	54 size_t frames,

55 size_t out_channels,	55 size_t out_channels,

56 std::complex<float>* const* out_block) override;	56 std::complex<float>* const* out_block) override;

(...skipping 20 matching lines...) Expand all Loading...
77 // Initializes ERB filterbank.	77 // Initializes ERB filterbank.

78 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);	78 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);

79	79

80 // Analytically solves quadratic for optimal gains given \|lambda\|.	80 // Analytically solves quadratic for optimal gains given \|lambda\|.

81 // Negative gains are set to 0. Stores the results in \|sols\|.	81 // Negative gains are set to 0. Stores the results in \|sols\|.

82 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);	82 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

83	83

84 // Returns true if the audio is speech.	84 // Returns true if the audio is speech.

85 bool IsSpeech(const float* audio);	85 bool IsSpeech(const float* audio);

86	86

	87 // Delays the high bands to compensate for the processing delay in the low

	88 // band.

	89 void DelayHighBands(AudioBuffer* audio);

	90

87 static const size_t kMaxNumNoiseEstimatesToBuffer = 5;	91 static const size_t kMaxNumNoiseEstimatesToBuffer = 5;

88	92

89 const size_t freqs_; // Num frequencies in frequency domain.	93 const size_t freqs_; // Num frequencies in frequency domain.

90 const size_t num_noise_bins_;	94 const size_t num_noise_bins_;

91 const size_t chunk_length_; // Chunk size in samples.	95 const size_t chunk_length_; // Chunk size in samples.

92 const size_t bank_size_; // Num ERB filters.	96 const size_t bank_size_; // Num ERB filters.

93 const int sample_rate_hz_;	97 const int sample_rate_hz_;

94 const size_t num_render_channels_;	98 const size_t num_render_channels_;

95	99

96 intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;	100 intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;

(...skipping 16 matching lines...) Expand all Loading...
113 bool is_speech_;	117 bool is_speech_;

114 float snr_;	118 float snr_;

115 bool is_active_;	119 bool is_active_;

116	120

117 unsigned long int num_chunks_;	121 unsigned long int num_chunks_;

118 unsigned long int num_active_chunks_;	122 unsigned long int num_active_chunks_;

119	123

120 std::vector<float> noise_estimation_buffer_;	124 std::vector<float> noise_estimation_buffer_;

121 SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>	125 SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>

122 noise_estimation_queue_;	126 noise_estimation_queue_;

	127

	128 std::vector<std::unique_ptr<AudioRingBuffer>> high_bands_buffers_;

123 };	129 };

124	130

125 } // namespace webrtc	131 } // namespace webrtc

126	132

127 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_	133 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_

OLD	NEW