OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 #include "webrtc/modules/video_processing/content_analysis.h" | |
11 | |
12 #include <math.h> | |
13 #include <stdlib.h> | |
14 | |
15 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" | |
16 | |
17 namespace webrtc { | |
18 | |
19 VPMContentAnalysis::VPMContentAnalysis(bool runtime_cpu_detection) | |
20 : orig_frame_(NULL), | |
21 prev_frame_(NULL), | |
22 width_(0), | |
23 height_(0), | |
24 skip_num_(1), | |
25 border_(8), | |
26 motion_magnitude_(0.0f), | |
27 spatial_pred_err_(0.0f), | |
28 spatial_pred_err_h_(0.0f), | |
29 spatial_pred_err_v_(0.0f), | |
30 first_frame_(true), | |
31 ca_Init_(false), | |
32 content_metrics_(NULL) { | |
33 ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_C; | |
34 TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_C; | |
35 | |
36 if (runtime_cpu_detection) { | |
37 #if defined(WEBRTC_ARCH_X86_FAMILY) | |
38 if (WebRtc_GetCPUInfo(kSSE2)) { | |
39 ComputeSpatialMetrics = &VPMContentAnalysis::ComputeSpatialMetrics_SSE2; | |
40 TemporalDiffMetric = &VPMContentAnalysis::TemporalDiffMetric_SSE2; | |
41 } | |
42 #endif | |
43 } | |
44 Release(); | |
45 } | |
46 | |
47 VPMContentAnalysis::~VPMContentAnalysis() { | |
48 Release(); | |
49 } | |
50 | |
51 VideoContentMetrics* VPMContentAnalysis::ComputeContentMetrics( | |
52 const VideoFrame& inputFrame) { | |
53 if (inputFrame.IsZeroSize()) | |
54 return NULL; | |
55 | |
56 // Init if needed (native dimension change). | |
57 if (width_ != inputFrame.width() || height_ != inputFrame.height()) { | |
58 if (VPM_OK != Initialize(inputFrame.width(), inputFrame.height())) | |
59 return NULL; | |
60 } | |
61 // Only interested in the Y plane. | |
62 orig_frame_ = inputFrame.buffer(kYPlane); | |
63 | |
64 // Compute spatial metrics: 3 spatial prediction errors. | |
65 (this->*ComputeSpatialMetrics)(); | |
66 | |
67 // Compute motion metrics | |
68 if (first_frame_ == false) | |
69 ComputeMotionMetrics(); | |
70 | |
71 // Saving current frame as previous one: Y only. | |
72 memcpy(prev_frame_, orig_frame_, width_ * height_); | |
73 | |
74 first_frame_ = false; | |
75 ca_Init_ = true; | |
76 | |
77 return ContentMetrics(); | |
78 } | |
79 | |
80 int32_t VPMContentAnalysis::Release() { | |
81 if (content_metrics_ != NULL) { | |
82 delete content_metrics_; | |
83 content_metrics_ = NULL; | |
84 } | |
85 | |
86 if (prev_frame_ != NULL) { | |
87 delete[] prev_frame_; | |
88 prev_frame_ = NULL; | |
89 } | |
90 | |
91 width_ = 0; | |
92 height_ = 0; | |
93 first_frame_ = true; | |
94 | |
95 return VPM_OK; | |
96 } | |
97 | |
98 int32_t VPMContentAnalysis::Initialize(int width, int height) { | |
99 width_ = width; | |
100 height_ = height; | |
101 first_frame_ = true; | |
102 | |
103 // skip parameter: # of skipped rows: for complexity reduction | |
104 // temporal also currently uses it for column reduction. | |
105 skip_num_ = 1; | |
106 | |
107 // use skipNum = 2 for 4CIF, WHD | |
108 if ((height_ >= 576) && (width_ >= 704)) { | |
109 skip_num_ = 2; | |
110 } | |
111 // use skipNum = 4 for FULLL_HD images | |
112 if ((height_ >= 1080) && (width_ >= 1920)) { | |
113 skip_num_ = 4; | |
114 } | |
115 | |
116 if (content_metrics_ != NULL) { | |
117 delete content_metrics_; | |
118 } | |
119 | |
120 if (prev_frame_ != NULL) { | |
121 delete[] prev_frame_; | |
122 } | |
123 | |
124 // Spatial Metrics don't work on a border of 8. Minimum processing | |
125 // block size is 16 pixels. So make sure the width and height support this. | |
126 if (width_ <= 32 || height_ <= 32) { | |
127 ca_Init_ = false; | |
128 return VPM_PARAMETER_ERROR; | |
129 } | |
130 | |
131 content_metrics_ = new VideoContentMetrics(); | |
132 if (content_metrics_ == NULL) { | |
133 return VPM_MEMORY; | |
134 } | |
135 | |
136 prev_frame_ = new uint8_t[width_ * height_]; // Y only. | |
137 if (prev_frame_ == NULL) | |
138 return VPM_MEMORY; | |
139 | |
140 return VPM_OK; | |
141 } | |
142 | |
143 // Compute motion metrics: magnitude over non-zero motion vectors, | |
144 // and size of zero cluster | |
145 int32_t VPMContentAnalysis::ComputeMotionMetrics() { | |
146 // Motion metrics: only one is derived from normalized | |
147 // (MAD) temporal difference | |
148 (this->*TemporalDiffMetric)(); | |
149 return VPM_OK; | |
150 } | |
151 | |
152 // Normalized temporal difference (MAD): used as a motion level metric | |
153 // Normalize MAD by spatial contrast: images with more contrast | |
154 // (pixel variance) likely have larger temporal difference | |
155 // To reduce complexity, we compute the metric for a reduced set of points. | |
156 int32_t VPMContentAnalysis::TemporalDiffMetric_C() { | |
157 // size of original frame | |
158 int sizei = height_; | |
159 int sizej = width_; | |
160 uint32_t tempDiffSum = 0; | |
161 uint32_t pixelSum = 0; | |
162 uint64_t pixelSqSum = 0; | |
163 | |
164 uint32_t num_pixels = 0; // Counter for # of pixels. | |
165 const int width_end = ((width_ - 2 * border_) & -16) + border_; | |
166 | |
167 for (int i = border_; i < sizei - border_; i += skip_num_) { | |
168 for (int j = border_; j < width_end; j++) { | |
169 num_pixels += 1; | |
170 int ssn = i * sizej + j; | |
171 | |
172 uint8_t currPixel = orig_frame_[ssn]; | |
173 uint8_t prevPixel = prev_frame_[ssn]; | |
174 | |
175 tempDiffSum += | |
176 static_cast<uint32_t>(abs((int16_t)(currPixel - prevPixel))); | |
177 pixelSum += static_cast<uint32_t>(currPixel); | |
178 pixelSqSum += static_cast<uint64_t>(currPixel * currPixel); | |
179 } | |
180 } | |
181 | |
182 // Default. | |
183 motion_magnitude_ = 0.0f; | |
184 | |
185 if (tempDiffSum == 0) | |
186 return VPM_OK; | |
187 | |
188 // Normalize over all pixels. | |
189 float const tempDiffAvg = | |
190 static_cast<float>(tempDiffSum) / static_cast<float>(num_pixels); | |
191 float const pixelSumAvg = | |
192 static_cast<float>(pixelSum) / static_cast<float>(num_pixels); | |
193 float const pixelSqSumAvg = | |
194 static_cast<float>(pixelSqSum) / static_cast<float>(num_pixels); | |
195 float contrast = pixelSqSumAvg - (pixelSumAvg * pixelSumAvg); | |
196 | |
197 if (contrast > 0.0) { | |
198 contrast = sqrt(contrast); | |
199 motion_magnitude_ = tempDiffAvg / contrast; | |
200 } | |
201 return VPM_OK; | |
202 } | |
203 | |
204 // Compute spatial metrics: | |
205 // To reduce complexity, we compute the metric for a reduced set of points. | |
206 // The spatial metrics are rough estimates of the prediction error cost for | |
207 // each QM spatial mode: 2x2,1x2,2x1 | |
208 // The metrics are a simple estimate of the up-sampling prediction error, | |
209 // estimated assuming sub-sampling for decimation (no filtering), | |
210 // and up-sampling back up with simple bilinear interpolation. | |
211 int32_t VPMContentAnalysis::ComputeSpatialMetrics_C() { | |
212 const int sizei = height_; | |
213 const int sizej = width_; | |
214 | |
215 // Pixel mean square average: used to normalize the spatial metrics. | |
216 uint32_t pixelMSA = 0; | |
217 | |
218 uint32_t spatialErrSum = 0; | |
219 uint32_t spatialErrVSum = 0; | |
220 uint32_t spatialErrHSum = 0; | |
221 | |
222 // make sure work section is a multiple of 16 | |
223 const int width_end = ((sizej - 2 * border_) & -16) + border_; | |
224 | |
225 for (int i = border_; i < sizei - border_; i += skip_num_) { | |
226 for (int j = border_; j < width_end; j++) { | |
227 int ssn1 = i * sizej + j; | |
228 int ssn2 = (i + 1) * sizej + j; // bottom | |
229 int ssn3 = (i - 1) * sizej + j; // top | |
230 int ssn4 = i * sizej + j + 1; // right | |
231 int ssn5 = i * sizej + j - 1; // left | |
232 | |
233 uint16_t refPixel1 = orig_frame_[ssn1] << 1; | |
234 uint16_t refPixel2 = orig_frame_[ssn1] << 2; | |
235 | |
236 uint8_t bottPixel = orig_frame_[ssn2]; | |
237 uint8_t topPixel = orig_frame_[ssn3]; | |
238 uint8_t rightPixel = orig_frame_[ssn4]; | |
239 uint8_t leftPixel = orig_frame_[ssn5]; | |
240 | |
241 spatialErrSum += static_cast<uint32_t>(abs(static_cast<int16_t>( | |
242 refPixel2 - static_cast<uint16_t>(bottPixel + topPixel + leftPixel + | |
243 rightPixel)))); | |
244 spatialErrVSum += static_cast<uint32_t>(abs(static_cast<int16_t>( | |
245 refPixel1 - static_cast<uint16_t>(bottPixel + topPixel)))); | |
246 spatialErrHSum += static_cast<uint32_t>(abs(static_cast<int16_t>( | |
247 refPixel1 - static_cast<uint16_t>(leftPixel + rightPixel)))); | |
248 pixelMSA += orig_frame_[ssn1]; | |
249 } | |
250 } | |
251 | |
252 // Normalize over all pixels. | |
253 const float spatialErr = static_cast<float>(spatialErrSum >> 2); | |
254 const float spatialErrH = static_cast<float>(spatialErrHSum >> 1); | |
255 const float spatialErrV = static_cast<float>(spatialErrVSum >> 1); | |
256 const float norm = static_cast<float>(pixelMSA); | |
257 | |
258 // 2X2: | |
259 spatial_pred_err_ = spatialErr / norm; | |
260 // 1X2: | |
261 spatial_pred_err_h_ = spatialErrH / norm; | |
262 // 2X1: | |
263 spatial_pred_err_v_ = spatialErrV / norm; | |
264 return VPM_OK; | |
265 } | |
266 | |
267 VideoContentMetrics* VPMContentAnalysis::ContentMetrics() { | |
268 if (ca_Init_ == false) | |
269 return NULL; | |
270 | |
271 content_metrics_->spatial_pred_err = spatial_pred_err_; | |
272 content_metrics_->spatial_pred_err_h = spatial_pred_err_h_; | |
273 content_metrics_->spatial_pred_err_v = spatial_pred_err_v_; | |
274 // Motion metric: normalized temporal difference (MAD). | |
275 content_metrics_->motion_magnitude = motion_magnitude_; | |
276 | |
277 return content_metrics_; | |
278 } | |
279 | |
280 } // namespace webrtc | |
OLD | NEW |