Mercurial > vec
changeset 11:13575ba795d3
impl/gcc: add native 256-bit and 512-bit intrinsics
these are simple to implement.
At some point I'd like to refactor vec into using a union and being
able to detect AVX512 and friends at compile time, so that the processors
that *can* use it are enabled at runtime. This would mean adding a vec_init
function, which isn't that big of a deal and can just be run at startup
anyway and will grab the CPU flags we need.
author | Paper <paper@tflc.us> |
---|---|
date | Mon, 18 Nov 2024 16:12:24 -0500 (2 months ago) |
parents | d1d5d767004c |
children | c93928877234 53197dbf4e8e |
files | include/vec/impl/gcc.h |
diffstat | 1 files changed, 223 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/include/vec/impl/gcc.h Mon Nov 18 15:44:09 2024 -0500 +++ b/include/vec/impl/gcc.h Mon Nov 18 16:12:24 2024 -0500 @@ -105,12 +105,15 @@ VEC_GENERIC_SHIFTS(sign, csign, bits, size) \ VEC_GENERIC_AVG(sign, bits, size) +// ----------------------------------------------------------------------------------- +// 128-bit vector types + #ifndef VEC_VUINT8X16 # define VEC_VUINT8X16 typedef uint8_t vuint8x16 __attribute__((__vector_size__(16))); # define VUINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ (vuint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } -# define VUINT8x16_ALIGNMENT 1 +# define VUINT8x16_ALIGNMENT 16 VEC_DEFINE_OPERATIONS(u, U, 8, 16) #endif @@ -119,7 +122,7 @@ typedef uint16_t vuint16x8 __attribute__((__vector_size__(16))); # define VUINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ (vuint16x8){ a, b, c, d, e, f, g, h } -# define VUINT16x8_ALIGNMENT 1 +# define VUINT16x8_ALIGNMENT 16 VEC_DEFINE_OPERATIONS(u, U, 16, 8) #endif @@ -128,7 +131,79 @@ typedef uint32_t vuint32x4 __attribute__((__vector_size__(16))); # define VUINT32x4_CONSTANT(a, b, c, d) \ (vuint32x4){ a, b, c, d } -# define VUINT32x4_ALIGNMENT 1 +# define VUINT32x4_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(u, U, 32, 4) +#endif + +#ifndef VEC_VUINT64X2 +# define VEC_VUINT64X2 +typedef uint64_t vuint64x2 __attribute__((__vector_size__(16))); +# define VUINT64x2_CONSTANT(a, b) \ + (vuint64x2){ a, b } +# define VUINT64x2_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(u, U, 64, 2) +#endif + +#ifndef VEC_VINT8X16 +# define VEC_VINT8X16 +typedef int8_t vint8x16 __attribute__((__vector_size__(16))); +# define VINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ + (vint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } +# define VINT8x16_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(, , 8, 16) +#endif + +#ifndef VEC_VINT16X8 +# define VEC_VINT16X8 +typedef int16_t vint16x8 __attribute__((__vector_size__(16))); +# define VINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ + (vint16x8){ a, b, c, d, e, f, g, h } +# define VINT16x8_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(, , 16, 8) +#endif + +#ifndef VEC_VINT32X4 +# define VEC_VINT32X4 +typedef int32_t vint32x4 __attribute__((__vector_size__(16))); +# define VINT32x4_CONSTANT(a, b, c, d) \ + (vint32x4){ a, b, c, d } +# define VINT32x4_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(, , 32, 4) +#endif + +#ifndef VEC_VINT64X2 +# define VEC_VINT64X2 +typedef int64_t vint64x2 __attribute__((__vector_size__(16))); +# define VINT64x2_CONSTANT(a, b) \ + (vint64x2){ a, b } +# define VINT64x2_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(, , 64, 2) +#endif + +#ifndef VEC_VUINT8X16 +# define VEC_VUINT8X16 +typedef uint8_t vuint8x16 __attribute__((__vector_size__(16))); +# define VUINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ + (vuint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } +# define VUINT8x16_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(u, U, 8, 16) +#endif + +#ifndef VEC_VUINT16X8 +# define VEC_VUINT16X8 +typedef uint16_t vuint16x8 __attribute__((__vector_size__(16))); +# define VUINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ + (vuint16x8){ a, b, c, d, e, f, g, h } +# define VUINT16x8_ALIGNMENT 16 +VEC_DEFINE_OPERATIONS(u, U, 16, 8) +#endif + +#ifndef VEC_VUINT32X4 +# define VEC_VUINT32X4 +typedef uint32_t vuint32x4 __attribute__((__vector_size__(16))); +# define VUINT32x4_CONSTANT(a, b, c, d) \ + (vuint32x4){ a, b, c, d } +# define VUINT32x4_ALIGNMENT 16 VEC_DEFINE_OPERATIONS(u, U, 32, 4) #endif @@ -137,44 +212,160 @@ typedef uint64_t vuint64x2 __attribute__((__vector_size__(16))); # define VUINT64x2_CONSTANT(a, b) \ (vuint64x2){ a, b } -# define VUINT64x2_ALIGNMENT 1 +# define VUINT64x2_ALIGNMENT 16 VEC_DEFINE_OPERATIONS(u, U, 64, 2) #endif -#ifndef VEC_VINT8X16 -# define VEC_VINT8X16 -typedef int8_t vint8x16 __attribute__((__vector_size__(16))); -# define VINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ - (vint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } -# define VINT8x16_ALIGNMENT 1 -VEC_DEFINE_OPERATIONS(, , 8, 16) +// -------------------------------------------------------------------------- +// 256-bit vector types + +#ifndef VEC_VUINT8X32 +# define VEC_VUINT8X32 +typedef uint8_t vuint8x32 __attribute__((__vector_size__(32))); +# define VUINT8x32_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af) \ + ((vuint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af }) +# define VUINT8x32_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(u, U, 8, 32) +#endif + +#ifndef VEC_VUINT16X16 +# define VEC_VUINT16X16 +typedef uint16_t vuint16x16 __attribute__((__vector_size__(32))); +# define VUINT16x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ + (vuint16x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } +# define VUINT16x16_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(u, U, 16, 16) +#endif + +#ifndef VEC_VUINT32X8 +# define VEC_VUINT32X8 +typedef uint32_t vuint32x8 __attribute__((__vector_size__(32))); +# define VUINT32x8_CONSTANT(a, b, c, d, e, f, g, h) \ + (vuint32x8){ a, b, c, d, e, f, g, h } +# define VUINT32x8_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(u, U, 32, 8) #endif -#ifndef VEC_VINT16X8 -# define VEC_VINT16X8 -typedef int16_t vint16x8 __attribute__((__vector_size__(16))); -# define VINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ - (vint16x8){ a, b, c, d, e, f, g, h } -# define VINT16x8_ALIGNMENT 1 -VEC_DEFINE_OPERATIONS(, , 16, 8) +#ifndef VEC_VUINT64X4 +# define VEC_VUINT64X4 +typedef uint64_t vuint64x4 __attribute__((__vector_size__(32))); +# define VUINT64x4_CONSTANT(a, b, c, d) \ + (vuint64x4){ a, b, c, d } +# define VUINT64x4_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(u, U, 64, 4) +#endif + +#ifndef VEC_VINT8X32 +# define VEC_VINT8X32 +typedef int8_t vint8x32 __attribute__((__vector_size__(32))); +# define VINT8x32_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af) \ + ((vint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af }) +# define VINT8x32_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(, , 8, 32) +#endif + +#ifndef VEC_VINT16X16 +# define VEC_VINT16X16 +typedef int16_t vint16x16 __attribute__((__vector_size__(32))); +# define VINT16x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ + (vint16x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } +# define VINT16x16_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(, , 16, 16) +#endif + +#ifndef VEC_VINT32X8 +# define VEC_VINT32X8 +typedef int32_t vint32x8 __attribute__((__vector_size__(32))); +# define VINT32x8_CONSTANT(a, b, c, d, e, f, g, h) \ + (vint32x8){ a, b, c, d, e, f, g, h } +# define VINT32x8_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(, , 32, 8) +#endif + +#ifndef VEC_VINT64X4 +# define VEC_VINT64X4 +typedef int64_t vint64x4 __attribute__((__vector_size__(32))); +# define VINT64x4_CONSTANT(a, b, c, d) \ + (vint64x4){ a, b, c, d } +# define VINT64x4_ALIGNMENT 32 +VEC_DEFINE_OPERATIONS(, , 64, 4) #endif -#ifndef VEC_VINT32X4 -# define VEC_VINT32X4 -typedef int32_t vint32x4 __attribute__((__vector_size__(16))); -# define VINT32x4_CONSTANT(a, b, c, d) \ - (vint32x4){ a, b, c, d } -# define VINT32x4_ALIGNMENT 1 -VEC_DEFINE_OPERATIONS(, , 32, 4) +// -------------------------------------------------------------------------- +// 512-bit vector types + +#ifndef VEC_VUINT8X64 +# define VEC_VUINT8X64 +typedef uint8_t vuint8x64 __attribute__((__vector_size__(64))); +# define VUINT8x64_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af, ag, ah, ai, aj, ak, al, am, an, ao, ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl) \ + ((vuint8x64){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af, ag, ah, ai, aj, ak, al, am, an, ao, ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl }) +# define VUINT8x64_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(u, U, 8, 64) +#endif + +#ifndef VEC_VUINT16X32 +# define VEC_VUINT16X32 +typedef uint16_t vuint16x32 __attribute__((__vector_size__(64))); +# define VUINT16x32_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af) \ + ((vuint16x32){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af }) +# define VUINT16x32_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(u, U, 16, 32) +#endif + +#ifndef VEC_VUINT32X16 +# define VEC_VUINT32X16 +typedef uint32_t vuint32x16 __attribute__((__vector_size__(64))); +# define VUINT32x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ + (vuint32x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } +# define VUINT32x16_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(u, U, 32, 16) +#endif + +#ifndef VEC_VUINT64X8 +# define VEC_VUINT64X8 +typedef uint64_t vuint64x8 __attribute__((__vector_size__(64))); +# define VUINT64x8_CONSTANT(a, b, c, d, e, f, g, h) \ + (vuint64x8){ a, b, c, d, e, f, g, h } +# define VUINT64x8_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(u, U, 64, 8) #endif -#ifndef VEC_VINT64X2 -# define VEC_VINT64X2 -typedef int64_t vint64x2 __attribute__((__vector_size__(16))); -# define VINT64x2_CONSTANT(a, b) \ - (vint64x2){ a, b } -# define VINT64x2_ALIGNMENT 1 -VEC_DEFINE_OPERATIONS(, , 64, 2) +#ifndef VEC_VINT8X64 +# define VEC_VINT8X64 +typedef int8_t vint8x64 __attribute__((__vector_size__(64))); +# define VINT8x64_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af, ag, ah, ai, aj, ak, al, am, an, ao, ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl) \ + ((vint8x64){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af, ag, ah, ai, aj, ak, al, am, an, ao, ap, aq, ar, as, at, au, av, aw, ax, ay, az, ba, bb, bc, bd, be, bf, bg, bh, bi, bj, bk, bl }) +# define VINT8x64_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(, , 8, 64) +#endif + +#ifndef VEC_VINT16X32 +# define VEC_VINT16X32 +typedef int16_t vint16x32 __attribute__((__vector_size__(64))); +# define VINT16x32_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af) \ + ((vint16x32){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, aa, ab, ac, ad, ae, af }) +# define VINT16x32_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(, , 16, 32) #endif +#ifndef VEC_VINT32X16 +# define VEC_VINT32X16 +typedef int32_t vint32x16 __attribute__((__vector_size__(64))); +# define VINT32x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ + (vint32x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } +# define VINT32x16_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(, , 32, 16) +#endif + +#ifndef VEC_VINT64X8 +# define VEC_VINT64X8 +typedef int64_t vint64x8 __attribute__((__vector_size__(64))); +# define VINT64x8_CONSTANT(a, b, c, d, e, f, g, h) \ + (vint64x8){ a, b, c, d, e, f, g, h } +# define VINT64x8_ALIGNMENT 64 +VEC_DEFINE_OPERATIONS(, , 64, 8) +#endif + +// ---------------------------------------------------------- + #undef VEC_DEFINE_OPERATIONS