# HG changeset patch # User Paper # Date 1729654035 14400 # Node ID 3c5545b1568f367e9bb585b019ed196121347b86 # Parent f12b5dd4e18cc6f80220f3a3be074b6aa50b445e *: much better alignment support & tests diff -r f12b5dd4e18c -r 3c5545b1568f include/vec/impl/altivec.h --- a/include/vec/impl/altivec.h Tue Oct 22 22:39:05 2024 -0400 +++ b/include/vec/impl/altivec.h Tue Oct 22 23:27:15 2024 -0400 @@ -109,7 +109,7 @@ # define VUINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ (vuint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } VEC_DEFINE_OPERATIONS(u, 8, 16) -# define VUINT8x16_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VUINT8x16_ALIGNMENT VEC_ALTIVEC_ALIGNMENT #endif /* VEC_VUINT8X16 */ #ifndef VEC_VINT8X16 @@ -118,7 +118,7 @@ # define VINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ (vint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } VEC_DEFINE_OPERATIONS(, 8, 16) -# define VINT8x16_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VINT8x16_ALIGNMENT VEC_ALTIVEC_ALIGNMENT #endif /* VEC_VINT8X16 */ #ifndef VEC_VUINT16X8 @@ -127,7 +127,7 @@ # define VUINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ (vuint16x8){ a, b, c, d, e, f, g, h } VEC_DEFINE_OPERATIONS(u, 16, 8) -# define VUINT16x8_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VUINT16x8_ALIGNMENT VEC_ALTIVEC_ALIGNMENT #endif /* VEC_VUINT16X8 */ #ifndef VEC_VINT16X8 @@ -136,7 +136,7 @@ # define VINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ (vint16x8){ a, b, c, d, e, f, g, h } VEC_DEFINE_OPERATIONS(, 16, 8) -# define VINT16x8_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VINT16x8_ALIGNMENT VEC_ALTIVEC_ALIGNMENT #endif /* VEC_VINT16X8 */ #ifndef VEC_VUINT32X4 @@ -145,7 +145,7 @@ # define VUINT32x4_CONSTANT(a, b, c, d) \ (vuint32x4){ a, b, c, d } VEC_DEFINE_OPERATIONS(u, 32, 4) -# define VUINT32x4_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VUINT32x4_ALIGNMENT VEC_ALTIVEC_ALIGNMENT #endif /* VEC_VUINT32X4 */ #ifndef VEC_VINT32X4 @@ -154,7 +154,7 @@ # define VINT32x4_CONSTANT(a, b, c, d) \ (vint32x4){ a, b, c, d } VEC_DEFINE_OPERATIONS(, 32, 4) -# define VINT32x4_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VINT32x4_ALIGNMENT VEC_ALTIVEC_ALIGNMENT #endif /* VEC_VINT32X4 */ #if defined(__POWER8__) && defined(__VSX__) @@ -165,7 +165,7 @@ # define VUINT64x2_CONSTANT(a, b) \ (vuint64x2){ a, b } VEC_DEFINE_OPERATIONS(u, 64, 2) -# define VUINT64x2_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VUINT64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT # endif /* VEC_VUINT64X2 */ # ifndef VEC_VINT64X2 @@ -174,7 +174,7 @@ # define VINT64x2_CONSTANT(a, b) \ (vint64x2){ a, b } VEC_DEFINE_OPERATIONS(, 64, 2) -# define VINT64x2_ALIGNED(x) ((x) % VEC_ALTIVEC_ALIGNMENT == 0) +# define VINT64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT # endif /* VEC_VINT64X2 */ #endif /* defined(__POWER8__) && defined(__VSX__) */ diff -r f12b5dd4e18c -r 3c5545b1568f include/vec/impl/gcc.h --- a/include/vec/impl/gcc.h Tue Oct 22 22:39:05 2024 -0400 +++ b/include/vec/impl/gcc.h Tue Oct 22 23:27:15 2024 -0400 @@ -28,16 +28,26 @@ #include #define VEC_DEFINE_OPERATIONS(sign, bits, size) \ - VEC_DECL_LOAD(sign, bits, size) \ + VEC_DECL_LOAD_ALIGNED(sign, bits, size) \ { \ v##sign##int##bits##x##size vec; \ memcpy(&vec, in, sizeof(vec)); \ return vec; \ } \ \ + VEC_DECL_LOAD(sign, bits, size) \ + { \ + return v##sign##int##bits##x##size##_load_aligned(in); \ + } \ + \ + VEC_DECL_STORE_ALIGNED(sign, bits, size) \ + { \ + memcpy(out, &vec, sizeof(vec)); \ + } \ + \ VEC_DECL_STORE(sign, bits, size) \ { \ - memcpy(out, &vec, sizeof(vec)); \ + return v##sign##int##bits##x##size##_store_aligned(vec, out); \ } \ \ VEC_DECL_ADD(sign, bits, size) \ diff -r f12b5dd4e18c -r 3c5545b1568f include/vec/impl/generic.h --- a/include/vec/impl/generic.h Tue Oct 22 22:39:05 2024 -0400 +++ b/include/vec/impl/generic.h Tue Oct 22 23:27:15 2024 -0400 @@ -33,16 +33,26 @@ } v##sign##int##bits##x##size; #define VEC_DEFINE_OPERATIONS(sign, bits, size) \ - VEC_DECL_LOAD(sign, bits, size) \ + VEC_DECL_LOAD_ALIGNED(sign, bits, size) \ { \ v##sign##int##bits##x##size vec; \ memcpy(vec.arr, in, sizeof(vec.arr)); \ return vec; \ } \ \ + VEC_DECL_LOAD(sign, bits, size) \ + { \ + return v##sign##int##bits##x##size##_load_aligned(in); \ + } \ + \ + VEC_DECL_STORE_ALIGNED(sign, bits, size) \ + { \ + memcpy(out, vec.arr, sizeof(vec.arr)); \ + } \ + \ VEC_DECL_STORE(sign, bits, size) \ { \ - memcpy(out, vec.arr, sizeof(vec.arr)); \ + return v##sign##int##bits##x##size##_store_aligned(vec, out); \ } \ \ VEC_DECL_ADD(sign, bits, size) \ @@ -93,7 +103,6 @@ ((vuint8x16){ .arr = { a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } }) VEC_DEFINE_OPERATIONS(u, 8, 16) VEC_GENERIC_COMPARISONS(u, 8, 16) -# define VINT8x16_ALIGNED 1 #endif #ifndef VEC_VUINT16X8 @@ -103,7 +112,6 @@ ((vuint16x8){ .arr = { a, b, c, d, e, f, g, h } }) VEC_DEFINE_OPERATIONS(u, 16, 8) VEC_GENERIC_COMPARISONS(u, 16, 8) -# define VINT16x8_ALIGNED 1 #endif #ifndef VEC_VUINT32X4 @@ -113,7 +121,6 @@ ((vuint32x4){ .arr = { a, b, c, d } }) VEC_DEFINE_OPERATIONS(u, 32, 4) VEC_GENERIC_COMPARISONS(u, 32, 4) -# define VINT32x4_ALIGNED 1 #endif #ifndef VEC_VUINT64X2 @@ -123,7 +130,6 @@ ((vuint64x2){ .arr = { a, b } }) VEC_DEFINE_OPERATIONS(u, 64, 2) VEC_GENERIC_COMPARISONS(u, 64, 2) -# define VINT64x2_ALIGNED 1 #endif #ifndef VEC_VINT8X16 @@ -133,7 +139,6 @@ ((vint8x16){ .arr = { a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } }) VEC_DEFINE_OPERATIONS(, 8, 16) VEC_GENERIC_COMPARISONS(, 8, 16) -# define VINT8x16_ALIGNED 1 #endif #ifndef VEC_VINT16X8 @@ -143,7 +148,6 @@ ((vint16x8){ .arr = { a, b, c, d, e, f, g, h } }) VEC_DEFINE_OPERATIONS(, 16, 8) VEC_GENERIC_COMPARISONS(, 16, 8) -# define VINT16x8_ALIGNED 1 #endif #ifndef VEC_VINT32X4 @@ -153,7 +157,6 @@ ((vint32x4){ .arr = { a, b, c, d } }) VEC_DEFINE_OPERATIONS(, 32, 4) VEC_GENERIC_COMPARISONS(, 32, 4) -# define VINT32x4_ALIGNED 1 #endif #ifndef VEC_VINT64X2 @@ -163,7 +166,6 @@ ((vint64x2){ .arr = { a, b } }) VEC_DEFINE_OPERATIONS(, 64, 2) VEC_GENERIC_COMPARISONS(, 64, 2) -# define VINT64x2_ALIGNED 1 #endif #undef VEC_DEFINE_STRUCT diff -r f12b5dd4e18c -r 3c5545b1568f include/vec/impl/sse2.h --- a/include/vec/impl/sse2.h Tue Oct 22 22:39:05 2024 -0400 +++ b/include/vec/impl/sse2.h Tue Oct 22 23:27:15 2024 -0400 @@ -26,12 +26,24 @@ #include /* memcpy */ +#define VEC_SSE2_ALIGNMENT 16 + #define VEC_DEFINE_OPERATIONS(sign, bits, size) \ + VEC_DECL_LOAD_ALIGNED(sign, bits, size) \ + { \ + return _mm_load_si128((const __m128i *)in); \ + } \ + \ VEC_DECL_LOAD(sign, bits, size) \ { \ return _mm_loadu_si128((const __m128i *)in); \ } \ \ + VEC_DECL_STORE_ALIGNED(sign, bits, size) \ + { \ + _mm_store_si128((__m128i *)out, vec); \ + } \ + \ VEC_DECL_STORE(sign, bits, size) \ { \ _mm_storeu_si128((__m128i *)out, vec); \ @@ -90,7 +102,7 @@ (_mm_setr_epi8(p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a)) VEC_DEFINE_OPERATIONS(u, 8, 16) VEC_GENERIC_COMPARISONS(u, 8, 16) -# define VINT8x16_ALIGNED 1 +# define VUINT8x16_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(u, 8, 16) { // unpack and multiply @@ -109,7 +121,7 @@ (_mm_setr_epi16(h, g, f, e, d, c, b, a)) VEC_DEFINE_OPERATIONS(u, 16, 8) VEC_GENERIC_COMPARISONS(u, 16, 8) -# define VINT16x8_ALIGNED 1 +# define VUINT16x8_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(u, 16, 8) { return _mm_mullo_epi16(vec1, vec2); @@ -123,7 +135,7 @@ (_mm_setr_epi32(d, c, b, a)) VEC_DEFINE_OPERATIONS(u, 32, 4) VEC_GENERIC_COMPARISONS(u, 32, 4) -# define VUINT32x4_ALIGNED 1 +# define VUINT32x4_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(u, 32, 4) { /* this was stolen from... somewhere :) */ @@ -146,7 +158,7 @@ } VEC_DEFINE_OPERATIONS(u, 64, 2) VEC_GENERIC_COMPARISONS(u, 64, 2) -# define VUINT64x2_ALIGNED 1 +# define VUINT64x2_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(u, 64, 2) { /* ac = (vec1 & 0xFFFFFFFF) * (vec2 & 0xFFFFFFFF); */ @@ -182,7 +194,7 @@ (_mm_setr_epi8(p, o, n, m, l, k, j, i, h, g, f, e, d, c, b, a)) VEC_DEFINE_OPERATIONS(, 8, 16) VEC_DEFINE_COMPARISONS_SIGNED(8, 16) -# define VINT8x16_ALIGNED 1 +# define VINT8x16_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(, 8, 16) { // unpack and multiply @@ -201,7 +213,7 @@ (_mm_setr_epi16(h, g, f, e, d, c, b, a)) VEC_DEFINE_OPERATIONS(, 16, 8) VEC_DEFINE_COMPARISONS_SIGNED(16, 8) -# define VINT16x8_ALIGNED 1 +# define VINT16x8_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(, 16, 8) { return _mm_mullo_epi16(vec1, vec2); @@ -215,7 +227,7 @@ (_mm_setr_epi32(d, c, b, a)) VEC_DEFINE_OPERATIONS(, 32, 4) VEC_DEFINE_COMPARISONS_SIGNED(32, 4) -# define VINT32x4_ALIGNED 1 +# define VINT32x4_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(, 32, 4) { __m128i a13 = _mm_shuffle_epi32(vec1, 0xF5); // (-,a3,-,a1) @@ -237,7 +249,7 @@ } VEC_DEFINE_OPERATIONS(, 64, 2) VEC_GENERIC_COMPARISONS(, 64, 2) -# define VINT64x2_ALIGNED 1 +# define VINT64x2_ALIGNMENT VEC_SSE2_ALIGNMENT VEC_DECL_MUL(, 64, 2) { /* ac = (vec1 & 0xFFFFFFFF) * (vec2 & 0xFFFFFFFF); */ diff -r f12b5dd4e18c -r 3c5545b1568f include/vec/vec.h --- a/include/vec/vec.h Tue Oct 22 22:39:05 2024 -0400 +++ b/include/vec/vec.h Tue Oct 22 23:27:15 2024 -0400 @@ -56,13 +56,13 @@ #endif #ifndef VEC_ALIGNED -# if VEC_GCC_ATLEAST(2, 7, 0) +# if VEC_GNUC_ATLEAST(2, 7, 0) # define VEC_ALIGNED(x) __attribute__((aligned(x))) # endif #endif #ifndef VEC_ALWAYS_INLINE -# if VEC_GCC_ATLEAST(3, 1, 0) +# if VEC_GNUC_ATLEAST(3, 1, 0) # define VEC_ALWAYS_INLINE(x) __attribute__((always_inline)) # endif #endif @@ -72,15 +72,22 @@ #endif #ifdef VEC_ALIGNED -# define VEC_ALIGNED_ARRAY(type, var, size, align) \ - VEC_ALIGNED(align) type var[size] +# define VEC_ALIGNED_ARRAY(type, var, length, align) \ + VEC_ALIGNED(align) type var[length] +# define VEC_ALIGNED_ARRAY_SIZEOF(var, align) \ + (sizeof(var)) #else /* allocate more than necessary to align */ -# define VEC_ALIGNED_ARRAY(type, var, size, align) \ - type var##_unaligned_[size + align - 1]; \ - type *var = (type *)((((intptr_t)var##_unaligned_ + align - 1) / align) * align) +# define VEC_ALIGNED_ARRAY(type, var, length, align) \ + unsigned char vec_##var##_unaligned_[((length) * sizeof(type)) + (align) - 1]; \ + type *var = (type *)((((intptr_t)vec_##var##_unaligned_ + (align) - 1) / (align)) * (align)) +# define VEC_ALIGNED_ARRAY_SIZEOF(var, align) \ + (sizeof(vec_##var##_unaligned_) - ((align) - 1)) #endif +#define VEC_ALIGNED_ARRAY_LENGTH(var, align) \ + (VEC_ALIGNED_ARRAY_SIZEOF(var, align)/sizeof(*var)) + /* --------------------------------------------------------------- */ /* bit shift */ @@ -178,7 +185,9 @@ #define VEC_DECL_SPLAT(sign, bits, size) VEC_OPERATION_THIS_DECL(sign, bits, size, splat, (sign##int##bits##_t x)) #define VEC_DECL_LOAD(sign, bits, size) VEC_OPERATION_THIS_DECL(sign, bits, size, load, (const sign##int##bits##_t in[size])) +#define VEC_DECL_LOAD_ALIGNED(sign, bits, size) VEC_OPERATION_THIS_DECL(sign, bits, size, load_aligned, (const sign##int##bits##_t in[size])) #define VEC_DECL_STORE(sign, bits, size) VEC_OPERATION_DECL(sign, bits, size, void, store, (v##sign##int##bits##x##size vec, sign##int##bits##_t out[size])) +#define VEC_DECL_STORE_ALIGNED(sign, bits, size) VEC_OPERATION_DECL(sign, bits, size, void, store_aligned, (v##sign##int##bits##x##size vec, sign##int##bits##_t out[size])) #define VEC_DECL_ADD(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, add) #define VEC_DECL_SUB(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, sub) #define VEC_DECL_MUL(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, mul) @@ -274,18 +283,21 @@ VEC_GENERIC_COMPARISON(sign, bits, size, EQ, ==) \ VEC_GENERIC_THAN_OR_EQUAL(sign, bits, size) +#ifndef VEC_SUPPRESS_HW /* POWER altivec */ -#ifdef __ALTIVEC__ -# include "impl/altivec.h" +# ifdef __ALTIVEC__ +# include "impl/altivec.h" +# endif +/* x86 SSE2 */ +# ifdef __SSE2__ +# include "impl/sse2.h" +# endif #endif -/* x86 SSE2 */ -#ifdef __SSE2__ -# include "impl/sse2.h" -#endif - -#ifdef VEC_HAVE_GNUC_VECTORS -# include "impl/gcc.h" +#ifndef VEC_SUPPRESS_GCC +# ifdef VEC_HAVE_GNUC_VECTORS +# include "impl/gcc.h" +# endif #endif #include "impl/generic.h" @@ -346,6 +358,90 @@ #undef VEC_GENERIC_COMPARISON #undef VEC_GENERIC_COMPARISONS +#undef VEC_VINT8X16 +#undef VEC_VINT16X8 +#undef VEC_VINT32X4 +#undef VEC_VINT64X2 +#undef VEC_VUINT8X16 +#undef VEC_VUINT16X8 +#undef VEC_VUINT32X4 +#undef VEC_VUINT64X2 + /* ---------------------------------------------------------------- */ +/* user-friendly alignment crap */ + +#ifndef VINT8x16_ALIGNMENT +# define VINT8x16_ALIGNMENT 1 +#endif + +#ifndef VINT16x8_ALIGNMENT +# define VINT16x8_ALIGNMENT 1 +#endif + +#ifndef VINT32x4_ALIGNMENT +# define VINT32x4_ALIGNMENT 1 +#endif + +#ifndef VINT64x2_ALIGNMENT +# define VINT64x2_ALIGNMENT 1 +#endif + +#ifndef VUINT8x16_ALIGNMENT +# define VUINT8x16_ALIGNMENT 1 +#endif + +#ifndef VUINT16x8_ALIGNMENT +# define VUINT16x8_ALIGNMENT 1 +#endif + +#ifndef VUINT32x4_ALIGNMENT +# define VUINT32x4_ALIGNMENT 1 +#endif + +#ifndef VUINT64x2_ALIGNMENT +# define VUINT64x2_ALIGNMENT 1 +#endif + +/* pointer alignment macros */ + +#define VINT8x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(int8_t, var, 16, VINT8x16_ALIGNMENT) +#define VINT8x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x16_ALIGNMENT) +#define VINT8x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x16_ALIGNMENT) +#define VINT8x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x16_ALIGNMENT == 0) + +#define VINT16x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(int16_t, var, 8, VINT16x8_ALIGNMENT) +#define VINT16x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT16x8_ALIGNMENT) +#define VINT16x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT16x8_ALIGNMENT) +#define VINT8x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x16_ALIGNMENT == 0) + +#define VINT32x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(int32_t, var, 4, VINT32x4_ALIGNMENT) +#define VINT32x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT32x4_ALIGNMENT) +#define VINT32x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT32x4_ALIGNMENT) +#define VINT32x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT32x4_ALIGNMENT == 0) + +#define VINT64x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(int64_t, var, 2, VINT64x2_ALIGNMENT) +#define VINT64x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT64x2_ALIGNMENT) +#define VINT64x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT64x2_ALIGNMENT) +#define VINT64x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT64x2_ALIGNMENT == 0) + +#define VUINT8x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(uint8_t, var, 16, VUINT8x16_ALIGNMENT) +#define VUINT8x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x16_ALIGNMENT) +#define VUINT8x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x16_ALIGNMENT) +#define VUINT8x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x16_ALIGNMENT == 0) + +#define VUINT16x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(uint16_t, var, 8, VUINT16x8_ALIGNMENT) +#define VUINT16x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT16x8_ALIGNMENT) +#define VUINT16x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT16x8_ALIGNMENT) +#define VUINT16x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT16x8_ALIGNMENT == 0) + +#define VUINT32x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(uint32_t, var, 4, VUINT32x4_ALIGNMENT) +#define VUINT32x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT32x4_ALIGNMENT) +#define VUINT32x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT32x4_ALIGNMENT) +#define VUINT32x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT32x4_ALIGNMENT == 0) + +#define VUINT64x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(uint64_t, var, 2, VUINT64x2_ALIGNMENT) +#define VUINT64x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT64x2_ALIGNMENT) +#define VUINT64x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT64x2_ALIGNMENT) +#define VUINT64x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT64x2_ALIGNMENT == 0) #endif /* VEC_VEC_H_ */ diff -r f12b5dd4e18c -r 3c5545b1568f test/Makefile --- a/test/Makefile Tue Oct 22 22:39:05 2024 -0400 +++ b/test/Makefile Tue Oct 22 23:27:15 2024 -0400 @@ -1,12 +1,30 @@ -_CFLAGS = -g -O2 -I../include $(CFLAGS) +CFLAGS += -std=c99 -I../include -_LDFLAGS = $(LDFLAGS) +# binary files +BINS = test-gcc test-generic test-host +OBJS = $(BINS:=.o) + +.PHONY: all clean test + +all: $(BINS) -.c.o: - $(CC) -c $(_CFLAGS) $< -o $@ +# suppress the platform-dependent hardware stuff so we only have +# GCC vector extensions +test-gcc: CFLAGS += -DVEC_SUPPRESS_HW -main: main.o - $(CC) $(_CFLAGS) -o $@ $^ $(_LDFLAGS) +# also suppress GCC extensions, leaving only the defaults +test-generic: CFLAGS += -DVEC_SUPPRESS_HW -DVEC_SUPPRESS_GCC + +$(OBJS): main.c + $(CC) $(CFLAGS) -o $@ -c $^ + +$(BINS): %: %.o + $(CC) $(LDFLAGS) -o $@ $^ clean: - $(RM) main main.o \ No newline at end of file + $(RM) $(BINS) $(OBJS) + +test: clean $(BINS) + ./test-gcc + ./test-generic + ./test-host diff -r f12b5dd4e18c -r 3c5545b1568f test/main.c --- a/test/main.c Tue Oct 22 22:39:05 2024 -0400 +++ b/test/main.c Tue Oct 22 23:27:15 2024 -0400 @@ -88,6 +88,7 @@ DEFINE_PRINT_VECTOR_2(32, 4) DEFINE_PRINT_VECTOR_2(64, 2) +#include "test_align.h" #include "test_arith.h" #include "test_compare.h" @@ -95,6 +96,7 @@ { int ret = 0; + ret |= test_align(); ret |= test_arith(); ret |= test_compare(); diff -r f12b5dd4e18c -r 3c5545b1568f test/test_align.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_align.h Tue Oct 22 23:27:15 2024 -0400 @@ -0,0 +1,40 @@ +static int test_align(void) +{ + int ret = 0; + +#define RUN_TEST(sign, csign, bits, size) \ + do { \ + /* allocate the aligned array */ \ + V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr); \ + \ + /* fill the values */ \ + for (int i = 0; i < V##csign##INT##bits##x##size##_ALIGNED_ARRAY_LENGTH(vec_arr); i++) \ + vec_arr[i] = i; \ + \ + /* try to load it */ \ + v##sign##int##bits##x##size vec = v##sign##int##bits##x##size##_load_aligned(vec_arr); \ + \ + /* now allocate an output array */ \ + V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr_out); \ + \ + /* try storing it */ \ + v##sign##int##bits##x##size##_store_aligned(vec, vec_arr_out); \ + \ + /* mark success or failure */ \ + ret |= !!memcmp(vec_arr, vec_arr_out, V##csign##INT##bits##x##size##_ALIGNED_ARRAY_LENGTH(vec_arr)); \ + } while (0); + +#define RUN_TESTS(bits, size) \ + RUN_TEST( , , bits, size) \ + RUN_TEST(u, U, bits, size) + + RUN_TESTS(8, 16) + RUN_TESTS(16, 8) + RUN_TESTS(32, 4) + RUN_TESTS(64, 2) + +#undef RUN_TESTS +#undef RUN_TEST + + return ret; +} diff -r f12b5dd4e18c -r 3c5545b1568f test/test_arith.h --- a/test/test_arith.h Tue Oct 22 22:39:05 2024 -0400 +++ b/test/test_arith.h Tue Oct 22 23:27:15 2024 -0400 @@ -117,4 +117,4 @@ #undef RUN_TESTS return ret; -} \ No newline at end of file +}