| Index: src/core/SkNx.h
|
| diff --git a/src/core/SkNx.h b/src/core/SkNx.h
|
| index 881a475ce0d12e3b30a6678d26e9d7852e1685a4..6bca856d8b11458495b0d66d15f5658c87296455 100644
|
| --- a/src/core/SkNx.h
|
| +++ b/src/core/SkNx.h
|
| @@ -332,6 +332,28 @@ SI void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk4h& b, cons
|
| Sk4h(r[3], g[3], b[3], a[3]).store(dst64 + 3);
|
| }
|
|
|
| +// Load 4 Sk4f and transpose them (512 bits total).
|
| +SI void Sk4f_load4(const void* vptr, Sk4f* r, Sk4f* g, Sk4f* b, Sk4f* a) {
|
| + const float* ptr = (const float*) vptr;
|
| + auto p0 = Sk4f::Load(ptr + 0),
|
| + p1 = Sk4f::Load(ptr + 4),
|
| + p2 = Sk4f::Load(ptr + 8),
|
| + p3 = Sk4f::Load(ptr + 12);
|
| + *r = { p0[0], p1[0], p2[0], p3[0] };
|
| + *g = { p0[1], p1[1], p2[1], p3[1] };
|
| + *b = { p0[2], p1[2], p2[2], p3[2] };
|
| + *a = { p0[3], p1[3], p2[3], p3[3] };
|
| +}
|
| +
|
| +// Transpose 4 Sk4f and store (512 bits total).
|
| +SI void Sk4f_store4(void* vdst, const Sk4f& r, const Sk4f& g, const Sk4f& b, const Sk4f& a) {
|
| + float* dst = (float*) vdst;
|
| + Sk4f(r[0], g[0], b[0], a[0]).store(dst + 0);
|
| + Sk4f(r[1], g[1], b[1], a[1]).store(dst + 4);
|
| + Sk4f(r[2], g[2], b[2], a[2]).store(dst + 8);
|
| + Sk4f(r[3], g[3], b[3], a[3]).store(dst + 12);
|
| +}
|
| +
|
| #endif
|
|
|
| SI void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
|
|
|