OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "webrtc/modules/desktop_capture/differ_block_sse2.h" | |
12 | |
13 #if defined(_MSC_VER) | |
14 #include <intrin.h> | |
15 #else | |
16 #include <mmintrin.h> | |
17 #include <emmintrin.h> | |
18 #endif | |
19 | |
20 #include "webrtc/modules/desktop_capture/differ_block.h" | |
21 | |
22 namespace webrtc { | |
23 | |
24 extern bool BlockDifference_SSE2_W16(const uint8_t* image1, | |
25 const uint8_t* image2, | |
26 int stride) { | |
27 __m128i acc = _mm_setzero_si128(); | |
28 __m128i v0; | |
29 __m128i v1; | |
30 __m128i sad; | |
31 for (int y = 0; y < kBlockSize; ++y) { | |
32 const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); | |
33 const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); | |
34 v0 = _mm_loadu_si128(i1); | |
35 v1 = _mm_loadu_si128(i2); | |
36 sad = _mm_sad_epu8(v0, v1); | |
37 acc = _mm_adds_epu16(acc, sad); | |
38 v0 = _mm_loadu_si128(i1 + 1); | |
39 v1 = _mm_loadu_si128(i2 + 1); | |
40 sad = _mm_sad_epu8(v0, v1); | |
41 acc = _mm_adds_epu16(acc, sad); | |
42 v0 = _mm_loadu_si128(i1 + 2); | |
43 v1 = _mm_loadu_si128(i2 + 2); | |
44 sad = _mm_sad_epu8(v0, v1); | |
45 acc = _mm_adds_epu16(acc, sad); | |
46 v0 = _mm_loadu_si128(i1 + 3); | |
47 v1 = _mm_loadu_si128(i2 + 3); | |
48 sad = _mm_sad_epu8(v0, v1); | |
49 acc = _mm_adds_epu16(acc, sad); | |
50 | |
51 // This essential means sad = acc >> 64. We only care about the lower 16 | |
52 // bits. | |
53 sad = _mm_shuffle_epi32(acc, 0xEE); | |
54 sad = _mm_adds_epu16(sad, acc); | |
55 int diff = _mm_cvtsi128_si32(sad); | |
56 if (diff) | |
57 return true; | |
58 image1 += stride; | |
59 image2 += stride; | |
60 } | |
61 return false; | |
62 } | |
63 | |
64 extern bool BlockDifference_SSE2_W32(const uint8_t* image1, | |
65 const uint8_t* image2, | |
66 int stride) { | |
67 __m128i acc = _mm_setzero_si128(); | |
68 __m128i v0; | |
69 __m128i v1; | |
70 __m128i sad; | |
71 for (int y = 0; y < kBlockSize; ++y) { | |
72 const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); | |
73 const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); | |
74 v0 = _mm_loadu_si128(i1); | |
75 v1 = _mm_loadu_si128(i2); | |
76 sad = _mm_sad_epu8(v0, v1); | |
77 acc = _mm_adds_epu16(acc, sad); | |
78 v0 = _mm_loadu_si128(i1 + 1); | |
79 v1 = _mm_loadu_si128(i2 + 1); | |
80 sad = _mm_sad_epu8(v0, v1); | |
81 acc = _mm_adds_epu16(acc, sad); | |
82 v0 = _mm_loadu_si128(i1 + 2); | |
83 v1 = _mm_loadu_si128(i2 + 2); | |
84 sad = _mm_sad_epu8(v0, v1); | |
85 acc = _mm_adds_epu16(acc, sad); | |
86 v0 = _mm_loadu_si128(i1 + 3); | |
87 v1 = _mm_loadu_si128(i2 + 3); | |
88 sad = _mm_sad_epu8(v0, v1); | |
89 acc = _mm_adds_epu16(acc, sad); | |
90 v0 = _mm_loadu_si128(i1 + 4); | |
91 v1 = _mm_loadu_si128(i2 + 4); | |
92 sad = _mm_sad_epu8(v0, v1); | |
93 acc = _mm_adds_epu16(acc, sad); | |
94 v0 = _mm_loadu_si128(i1 + 5); | |
95 v1 = _mm_loadu_si128(i2 + 5); | |
96 sad = _mm_sad_epu8(v0, v1); | |
97 acc = _mm_adds_epu16(acc, sad); | |
98 v0 = _mm_loadu_si128(i1 + 6); | |
99 v1 = _mm_loadu_si128(i2 + 6); | |
100 sad = _mm_sad_epu8(v0, v1); | |
101 acc = _mm_adds_epu16(acc, sad); | |
102 v0 = _mm_loadu_si128(i1 + 7); | |
103 v1 = _mm_loadu_si128(i2 + 7); | |
104 sad = _mm_sad_epu8(v0, v1); | |
105 acc = _mm_adds_epu16(acc, sad); | |
106 | |
107 // This essential means sad = acc >> 64. We only care about the lower 16 | |
108 // bits. | |
109 sad = _mm_shuffle_epi32(acc, 0xEE); | |
110 sad = _mm_adds_epu16(sad, acc); | |
111 int diff = _mm_cvtsi128_si32(sad); | |
112 if (diff) | |
113 return true; | |
114 image1 += stride; | |
115 image2 += stride; | |
116 } | |
117 return false; | |
118 } | |
119 | |
120 } // namespace webrtc | |
OLD | NEW |