Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(829)

Side by Side Diff: webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h

Issue 1729753003: Fix the stereo support in IntelligibilityEnhancer (Closed) Base URL: https://chromium.googlesource.com/external/webrtc.git@gains2
Patch Set: Rebasing Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_ 11 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_
12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_ 12 #define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER _H_
13 13
14 #include <complex> 14 #include <complex>
15 #include <memory> 15 #include <memory>
16 #include <vector> 16 #include <vector>
17 17
18 #include "webrtc/common_audio/lapped_transform.h" 18 #include "webrtc/common_audio/lapped_transform.h"
19 #include "webrtc/common_audio/channel_buffer.h" 19 #include "webrtc/common_audio/channel_buffer.h"
20 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h" 20 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils. h"
21 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" 21 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
22 22
23 namespace webrtc { 23 namespace webrtc {
24 24
25 // Speech intelligibility enhancement module. Reads render and capture 25 // Speech intelligibility enhancement module. Reads render and capture
26 // audio streams and modifies the render stream with a set of gains per 26 // audio streams and modifies the render stream with a set of gains per
27 // frequency bin to enhance speech against the noise background. 27 // frequency bin to enhance speech against the noise background.
28 // Details of the model and algorithm can be found in the original paper: 28 // Details of the model and algorithm can be found in the original paper:
29 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 29 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
30 class IntelligibilityEnhancer { 30 class IntelligibilityEnhancer : public LappedTransform::Callback {
31 public: 31 public:
32 IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels); 32 IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels);
33 33
34 // Sets the capture noise magnitude spectrum estimate. 34 // Sets the capture noise magnitude spectrum estimate.
35 void SetCaptureNoiseEstimate(std::vector<float> noise); 35 void SetCaptureNoiseEstimate(std::vector<float> noise);
36 36
37 // Reads chunk of speech in time domain and updates with modified signal. 37 // Reads chunk of speech in time domain and updates with modified signal.
38 void ProcessRenderAudio(float* const* audio, 38 void ProcessRenderAudio(float* const* audio,
39 int sample_rate_hz, 39 int sample_rate_hz,
40 size_t num_channels); 40 size_t num_channels);
41 bool active() const; 41 bool active() const;
42 42
43 protected:
44 // All in frequency domain, receives input |in_block|, applies
45 // intelligibility enhancement, and writes result to |out_block|.
46 void ProcessAudioBlock(const std::complex<float>* const* in_block,
47 size_t in_channels,
48 size_t frames,
49 size_t out_channels,
50 std::complex<float>* const* out_block) override;
51
43 private: 52 private:
44 // Provides access point to the frequency domain.
45 class TransformCallback : public LappedTransform::Callback {
46 public:
47 TransformCallback(IntelligibilityEnhancer* parent);
48
49 // All in frequency domain, receives input |in_block|, applies
50 // intelligibility enhancement, and writes result to |out_block|.
51 void ProcessAudioBlock(const std::complex<float>* const* in_block,
52 size_t in_channels,
53 size_t frames,
54 size_t out_channels,
55 std::complex<float>* const* out_block) override;
56
57 private:
58 IntelligibilityEnhancer* parent_;
59 };
60 friend class TransformCallback;
61 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); 53 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
62 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); 54 FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
63 55
64 // Updates power computation and analysis with |in_block_|,
65 // and writes modified speech to |out_block|.
66 void ProcessClearBlock(const std::complex<float>* in_block,
67 std::complex<float>* out_block);
68
69 // Bisection search for optimal |lambda|. 56 // Bisection search for optimal |lambda|.
70 void SolveForLambda(float power_target); 57 void SolveForLambda(float power_target);
71 58
72 // Transforms freq gains to ERB gains. 59 // Transforms freq gains to ERB gains.
73 void UpdateErbGains(); 60 void UpdateErbGains();
74 61
75 // Returns number of ERB filters. 62 // Returns number of ERB filters.
76 static size_t GetBankSize(int sample_rate, size_t erb_resolution); 63 static size_t GetBankSize(int sample_rate, size_t erb_resolution);
77 64
78 // Initializes ERB filterbank. 65 // Initializes ERB filterbank.
79 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs); 66 std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
80 67
81 // Analytically solves quadratic for optimal gains given |lambda|. 68 // Analytically solves quadratic for optimal gains given |lambda|.
82 // Negative gains are set to 0. Stores the results in |sols|. 69 // Negative gains are set to 0. Stores the results in |sols|.
83 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); 70 void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
84 71
85 // Returns true if the audio is speech. 72 // Returns true if the audio is speech.
86 bool IsSpeech(const float* audio); 73 bool IsSpeech(const float* audio);
87 74
88 const size_t freqs_; // Num frequencies in frequency domain. 75 const size_t freqs_; // Num frequencies in frequency domain.
89 const size_t chunk_length_; // Chunk size in samples. 76 const size_t chunk_length_; // Chunk size in samples.
90 const size_t bank_size_; // Num ERB filters. 77 const size_t bank_size_; // Num ERB filters.
91 const int sample_rate_hz_; 78 const int sample_rate_hz_;
92 const size_t num_render_channels_; 79 const size_t num_render_channels_;
93 80
94 intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_; 81 intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
95 std::unique_ptr<intelligibility::PowerEstimator<float>> 82 std::unique_ptr<intelligibility::PowerEstimator<float>>
96 noise_power_estimator_; 83 noise_power_estimator_;
97 std::unique_ptr<float[]> filtered_clear_pow_; 84 std::vector<float> filtered_clear_pow_;
98 std::unique_ptr<float[]> filtered_noise_pow_; 85 std::vector<float> filtered_noise_pow_;
99 std::unique_ptr<float[]> center_freqs_; 86 std::vector<float> center_freqs_;
100 std::vector<std::vector<float>> capture_filter_bank_; 87 std::vector<std::vector<float>> capture_filter_bank_;
101 std::vector<std::vector<float>> render_filter_bank_; 88 std::vector<std::vector<float>> render_filter_bank_;
102 size_t start_freq_; 89 size_t start_freq_;
103 90
104 std::unique_ptr<float[]> gains_eq_; // Pre-filter modified gains. 91 std::vector<float> gains_eq_; // Pre-filter modified gains.
105 intelligibility::GainApplier gain_applier_; 92 intelligibility::GainApplier gain_applier_;
106 93
107 // Destination buffers used to reassemble blocked chunks before overwriting
108 // the original input array with modifications.
109 ChannelBuffer<float> temp_render_out_buffer_;
110
111 TransformCallback render_callback_;
112 std::unique_ptr<LappedTransform> render_mangler_; 94 std::unique_ptr<LappedTransform> render_mangler_;
113 95
114 VoiceActivityDetector vad_; 96 VoiceActivityDetector vad_;
115 std::vector<int16_t> audio_s16_; 97 std::vector<int16_t> audio_s16_;
116 size_t chunks_since_voice_; 98 size_t chunks_since_voice_;
117 bool is_speech_; 99 bool is_speech_;
118 }; 100 };
119 101
120 } // namespace webrtc 102 } // namespace webrtc
121 103
122 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_ 104 #endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHAN CER_H_
OLDNEW
« no previous file with comments | « no previous file | webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698