OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "webrtc/modules/audio_processing/agc/histogram.h" | 11 #include "webrtc/modules/audio_processing/agc/loudness_histogram.h" |
12 | 12 |
13 #include <cmath> | 13 #include <cmath> |
14 #include <cstring> | 14 #include <cstring> |
15 | 15 |
16 #include "webrtc/modules/include/module_common_types.h" | 16 #include "webrtc/modules/include/module_common_types.h" |
17 | 17 |
18 namespace webrtc { | 18 namespace webrtc { |
19 | 19 |
20 static const double kHistBinCenters[] = { | 20 static const double kHistBinCenters[] = { |
21 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, | 21 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, |
22 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, | 22 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, |
23 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, | 23 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, |
24 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, | 24 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, |
25 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, | 25 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, |
26 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, | 26 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, |
27 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, | 27 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, |
28 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, | 28 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, |
29 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, | 29 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, |
30 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, | 30 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, |
31 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, | 31 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, |
32 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, | 32 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, |
33 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, | 33 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, |
34 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, | 34 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, |
35 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, | 35 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, |
36 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, | 36 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, |
37 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, | 37 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, |
38 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, | 38 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, |
39 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, | 39 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, |
40 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, | 40 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, |
41 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, | 41 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, |
42 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, | 42 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, |
43 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, | 43 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, |
44 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, | 44 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, |
45 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, | 45 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, |
46 3.00339145144454e+04, 3.56647189489147e+04}; | 46 3.00339145144454e+04, 3.56647189489147e+04}; |
47 | 47 |
48 static const double kProbQDomain = 1024.0; | 48 static const double kProbQDomain = 1024.0; |
49 // Loudness of -15 dB (smallest expected loudness) in log domain, | 49 // Loudness of -15 dB (smallest expected loudness) in log domain, |
50 // loudness_db = 13.5 * log10(rms); | 50 // loudness_db = 13.5 * log10(rms); |
51 static const double kLogDomainMinBinCenter = -2.57752062648587; | 51 static const double kLogDomainMinBinCenter = -2.57752062648587; |
52 // Loudness step of 1 dB in log domain | 52 // Loudness step of 1 dB in log domain |
53 static const double kLogDomainStepSizeInverse = 5.81954605750359; | 53 static const double kLogDomainStepSizeInverse = 5.81954605750359; |
54 | 54 |
55 static const int kTransientWidthThreshold = 7; | 55 static const int kTransientWidthThreshold = 7; |
56 static const double kLowProbabilityThreshold = 0.2; | 56 static const double kLowProbabilityThreshold = 0.2; |
57 | 57 |
58 static const int kLowProbThresholdQ10 = static_cast<int>( | 58 static const int kLowProbThresholdQ10 = |
59 kLowProbabilityThreshold * kProbQDomain); | 59 static_cast<int>(kLowProbabilityThreshold * kProbQDomain); |
60 | 60 |
61 Histogram::Histogram() | 61 LoudnessHistogram::LoudnessHistogram() |
62 : num_updates_(0), | 62 : num_updates_(0), |
63 audio_content_q10_(0), | 63 audio_content_q10_(0), |
64 bin_count_q10_(), | 64 bin_count_q10_(), |
65 activity_probability_(), | 65 activity_probability_(), |
66 hist_bin_index_(), | 66 hist_bin_index_(), |
67 buffer_index_(0), | 67 buffer_index_(0), |
68 buffer_is_full_(false), | 68 buffer_is_full_(false), |
69 len_circular_buffer_(0), | 69 len_circular_buffer_(0), |
70 len_high_activity_(0) { | 70 len_high_activity_(0) { |
71 static_assert( | 71 static_assert( |
72 kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), | 72 kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), |
73 "histogram bin centers incorrect size"); | 73 "histogram bin centers incorrect size"); |
74 } | 74 } |
75 | 75 |
76 Histogram::Histogram(int window_size) | 76 LoudnessHistogram::LoudnessHistogram(int window_size) |
77 : num_updates_(0), | 77 : num_updates_(0), |
78 audio_content_q10_(0), | 78 audio_content_q10_(0), |
79 bin_count_q10_(), | 79 bin_count_q10_(), |
80 activity_probability_(new int[window_size]), | 80 activity_probability_(new int[window_size]), |
81 hist_bin_index_(new int[window_size]), | 81 hist_bin_index_(new int[window_size]), |
82 buffer_index_(0), | 82 buffer_index_(0), |
83 buffer_is_full_(false), | 83 buffer_is_full_(false), |
84 len_circular_buffer_(window_size), | 84 len_circular_buffer_(window_size), |
85 len_high_activity_(0) {} | 85 len_high_activity_(0) {} |
86 | 86 |
87 Histogram::~Histogram() {} | 87 LoudnessHistogram::~LoudnessHistogram() {} |
88 | 88 |
89 void Histogram::Update(double rms, double activity_probaility) { | 89 void LoudnessHistogram::Update(double rms, double activity_probaility) { |
90 // If circular histogram is activated then remove the oldest entry. | 90 // If circular histogram is activated then remove the oldest entry. |
91 if (len_circular_buffer_ > 0) | 91 if (len_circular_buffer_ > 0) |
92 RemoveOldestEntryAndUpdate(); | 92 RemoveOldestEntryAndUpdate(); |
93 | 93 |
94 // Find the corresponding bin. | 94 // Find the corresponding bin. |
95 int hist_index = GetBinIndex(rms); | 95 int hist_index = GetBinIndex(rms); |
96 // To Q10 domain. | 96 // To Q10 domain. |
97 int prob_q10 = static_cast<int16_t>(floor(activity_probaility * | 97 int prob_q10 = |
98 kProbQDomain)); | 98 static_cast<int16_t>(floor(activity_probaility * kProbQDomain)); |
99 InsertNewestEntryAndUpdate(prob_q10, hist_index); | 99 InsertNewestEntryAndUpdate(prob_q10, hist_index); |
100 } | 100 } |
101 | 101 |
102 // Doing nothing if buffer is not full, yet. | 102 // Doing nothing if buffer is not full, yet. |
103 void Histogram::RemoveOldestEntryAndUpdate() { | 103 void LoudnessHistogram::RemoveOldestEntryAndUpdate() { |
104 assert(len_circular_buffer_ > 0); | 104 assert(len_circular_buffer_ > 0); |
105 // Do nothing if circular buffer is not full. | 105 // Do nothing if circular buffer is not full. |
106 if (!buffer_is_full_) | 106 if (!buffer_is_full_) |
107 return; | 107 return; |
108 | 108 |
109 int oldest_prob = activity_probability_[buffer_index_]; | 109 int oldest_prob = activity_probability_[buffer_index_]; |
110 int oldest_hist_index = hist_bin_index_[buffer_index_]; | 110 int oldest_hist_index = hist_bin_index_[buffer_index_]; |
111 UpdateHist(-oldest_prob, oldest_hist_index); | 111 UpdateHist(-oldest_prob, oldest_hist_index); |
112 } | 112 } |
113 | 113 |
114 void Histogram::RemoveTransient() { | 114 void LoudnessHistogram::RemoveTransient() { |
115 // Don't expect to be here if high-activity region is longer than | 115 // Don't expect to be here if high-activity region is longer than |
116 // |kTransientWidthThreshold| or there has not been any transient. | 116 // |kTransientWidthThreshold| or there has not been any transient. |
117 assert(len_high_activity_ <= kTransientWidthThreshold); | 117 assert(len_high_activity_ <= kTransientWidthThreshold); |
118 int index = (buffer_index_ > 0) ? (buffer_index_ - 1) : | 118 int index = |
119 len_circular_buffer_ - 1; | 119 (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1; |
120 while (len_high_activity_ > 0) { | 120 while (len_high_activity_ > 0) { |
121 UpdateHist(-activity_probability_[index], hist_bin_index_[index]); | 121 UpdateHist(-activity_probability_[index], hist_bin_index_[index]); |
122 activity_probability_[index] = 0; | 122 activity_probability_[index] = 0; |
123 index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); | 123 index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); |
124 len_high_activity_--; | 124 len_high_activity_--; |
125 } | 125 } |
126 } | 126 } |
127 | 127 |
128 void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10, | 128 void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10, |
129 int hist_index) { | 129 int hist_index) { |
130 // Update the circular buffer if it is enabled. | 130 // Update the circular buffer if it is enabled. |
131 if (len_circular_buffer_ > 0) { | 131 if (len_circular_buffer_ > 0) { |
132 // Removing transient. | 132 // Removing transient. |
133 if (activity_prob_q10 <= kLowProbThresholdQ10) { | 133 if (activity_prob_q10 <= kLowProbThresholdQ10) { |
134 // Lower than threshold probability, set it to zero. | 134 // Lower than threshold probability, set it to zero. |
135 activity_prob_q10 = 0; | 135 activity_prob_q10 = 0; |
136 // Check if this has been a transient. | 136 // Check if this has been a transient. |
137 if (len_high_activity_ <= kTransientWidthThreshold) | 137 if (len_high_activity_ <= kTransientWidthThreshold) |
138 RemoveTransient(); // Remove this transient. | 138 RemoveTransient(); // Remove this transient. |
139 len_high_activity_ = 0; | 139 len_high_activity_ = 0; |
(...skipping 11 matching lines...) Expand all Loading... |
151 } | 151 } |
152 } | 152 } |
153 | 153 |
154 num_updates_++; | 154 num_updates_++; |
155 if (num_updates_ < 0) | 155 if (num_updates_ < 0) |
156 num_updates_--; | 156 num_updates_--; |
157 | 157 |
158 UpdateHist(activity_prob_q10, hist_index); | 158 UpdateHist(activity_prob_q10, hist_index); |
159 } | 159 } |
160 | 160 |
161 void Histogram::UpdateHist(int activity_prob_q10, int hist_index) { | 161 void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) { |
162 bin_count_q10_[hist_index] += activity_prob_q10; | 162 bin_count_q10_[hist_index] += activity_prob_q10; |
163 audio_content_q10_ += activity_prob_q10; | 163 audio_content_q10_ += activity_prob_q10; |
164 } | 164 } |
165 | 165 |
166 double Histogram::AudioContent() const { | 166 double LoudnessHistogram::AudioContent() const { |
167 return audio_content_q10_ / kProbQDomain; | 167 return audio_content_q10_ / kProbQDomain; |
168 } | 168 } |
169 | 169 |
170 Histogram* Histogram::Create() { | 170 LoudnessHistogram* LoudnessHistogram::Create() { |
171 return new Histogram; | 171 return new LoudnessHistogram; |
172 } | 172 } |
173 | 173 |
174 Histogram* Histogram::Create(int window_size) { | 174 LoudnessHistogram* LoudnessHistogram::Create(int window_size) { |
175 if (window_size < 0) | 175 if (window_size < 0) |
176 return NULL; | 176 return NULL; |
177 return new Histogram(window_size); | 177 return new LoudnessHistogram(window_size); |
178 } | 178 } |
179 | 179 |
180 void Histogram::Reset() { | 180 void LoudnessHistogram::Reset() { |
181 // Reset the histogram, audio-content and number of updates. | 181 // Reset the histogram, audio-content and number of updates. |
182 memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); | 182 memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); |
183 audio_content_q10_ = 0; | 183 audio_content_q10_ = 0; |
184 num_updates_ = 0; | 184 num_updates_ = 0; |
185 // Empty the circular buffer. | 185 // Empty the circular buffer. |
186 buffer_index_ = 0; | 186 buffer_index_ = 0; |
187 buffer_is_full_ = false; | 187 buffer_is_full_ = false; |
188 len_high_activity_ = 0; | 188 len_high_activity_ = 0; |
189 } | 189 } |
190 | 190 |
191 int Histogram::GetBinIndex(double rms) { | 191 int LoudnessHistogram::GetBinIndex(double rms) { |
192 // First exclude overload cases. | 192 // First exclude overload cases. |
193 if (rms <= kHistBinCenters[0]) { | 193 if (rms <= kHistBinCenters[0]) { |
194 return 0; | 194 return 0; |
195 } else if (rms >= kHistBinCenters[kHistSize - 1]) { | 195 } else if (rms >= kHistBinCenters[kHistSize - 1]) { |
196 return kHistSize - 1; | 196 return kHistSize - 1; |
197 } else { | 197 } else { |
198 // The quantizer is uniform in log domain. Alternatively we could do binary | 198 // The quantizer is uniform in log domain. Alternatively we could do binary |
199 // search in linear domain. | 199 // search in linear domain. |
200 double rms_log = log(rms); | 200 double rms_log = log(rms); |
201 | 201 |
202 int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) * | 202 int index = static_cast<int>( |
203 kLogDomainStepSizeInverse)); | 203 floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse)); |
204 // The final decision is in linear domain. | 204 // The final decision is in linear domain. |
205 double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); | 205 double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); |
206 if (rms > b) { | 206 if (rms > b) { |
207 return index + 1; | 207 return index + 1; |
208 } | 208 } |
209 return index; | 209 return index; |
210 } | 210 } |
211 } | 211 } |
212 | 212 |
213 double Histogram::CurrentRms() const { | 213 double LoudnessHistogram::CurrentRms() const { |
214 double p; | 214 double p; |
215 double mean_val = 0; | 215 double mean_val = 0; |
216 if (audio_content_q10_ > 0) { | 216 if (audio_content_q10_ > 0) { |
217 double p_total_inverse = 1. / static_cast<double>(audio_content_q10_); | 217 double p_total_inverse = 1. / static_cast<double>(audio_content_q10_); |
218 for (int n = 0; n < kHistSize; n++) { | 218 for (int n = 0; n < kHistSize; n++) { |
219 p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse; | 219 p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse; |
220 mean_val += p * kHistBinCenters[n]; | 220 mean_val += p * kHistBinCenters[n]; |
221 } | 221 } |
222 } else { | 222 } else { |
223 mean_val = kHistBinCenters[0]; | 223 mean_val = kHistBinCenters[0]; |
224 } | 224 } |
225 return mean_val; | 225 return mean_val; |
226 } | 226 } |
227 | 227 |
228 } // namespace webrtc | 228 } // namespace webrtc |
OLD | NEW |