Mercurial > vec
view include/vec/vec.h @ 2:f12b5dd4e18c
*: many new operations and a real test suite
author | Paper <paper@tflc.us> |
---|---|
date | Tue, 22 Oct 2024 22:39:05 -0400 |
parents | 02a517e4c492 |
children | 3c5545b1568f |
line wrap: on
line source
/** * vec - a tiny SIMD vector library in C99 * * Copyright (c) 2024 Paper * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. **/ #ifndef VEC_VEC_H_ #define VEC_VEC_H_ #include <stdint.h> #include <limits.h> #define VEC_SEMVER_ATLEAST(a, b, c, x, y, z) \ (((a) >= (x)) && \ ((a) > x || (b) >= (y)) && \ ((a) > x || (b) > (y) || (c) >= (z))) #define VEC_GNUC_ATLEAST(x, y, z) \ VEC_SEMVER_ATLEAST(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, x, y, z) /* GCC/clang attributes */ #if defined(__has_attribute) # if __has_attribute(__always_inline__) # define VEC_ALWAYS_INLINE __attribute__((__always_inline__)) # endif # if __has_attribute(__aligned__) # define VEC_ALIGNED(x) __attribute__((__aligned__(x))) # endif # if __has_attribute(__vector_size__) # define VEC_HAVE_GNUC_VECTORS # endif #endif #ifndef VEC_HAVE_GNUC_VECTORS # if VEC_GNUC_ATLEAST(4, 0, 0) # define VEC_HAVE_GNUC_VECTORS # endif #endif #ifndef VEC_ALIGNED # if VEC_GCC_ATLEAST(2, 7, 0) # define VEC_ALIGNED(x) __attribute__((aligned(x))) # endif #endif #ifndef VEC_ALWAYS_INLINE # if VEC_GCC_ATLEAST(3, 1, 0) # define VEC_ALWAYS_INLINE(x) __attribute__((always_inline)) # endif #endif #ifndef VEC_ALWAYS_INLINE # define VEC_ALWAYS_INLINE #endif #ifdef VEC_ALIGNED # define VEC_ALIGNED_ARRAY(type, var, size, align) \ VEC_ALIGNED(align) type var[size] #else /* allocate more than necessary to align */ # define VEC_ALIGNED_ARRAY(type, var, size, align) \ type var##_unaligned_[size + align - 1]; \ type *var = (type *)((((intptr_t)var##_unaligned_ + align - 1) / align) * align) #endif /* --------------------------------------------------------------- */ /* bit shift */ static inline VEC_ALWAYS_INLINE uintmax_t vec_ulrshift(uintmax_t x, unsigned int y) { return x >> y; } static inline VEC_ALWAYS_INLINE uintmax_t vec_ullshift(uintmax_t x, unsigned int y) { return x << y; } static inline VEC_ALWAYS_INLINE intmax_t vec_lrshift(intmax_t x, unsigned int y) { return (intmax_t)(((uintmax_t)x) >> y); } static inline VEC_ALWAYS_INLINE intmax_t vec_llshift(intmax_t x, unsigned int y) { return (intmax_t)(((uintmax_t)x) << y); } static inline VEC_ALWAYS_INLINE uintmax_t vec_urshift(uintmax_t x, unsigned int y) { return x >> y; } static inline VEC_ALWAYS_INLINE uintmax_t vec_ulshift(uintmax_t x, unsigned int y) { return x << y; } /** * Arithmetic shifts; based off code from OpenMPT, which is under * the Boost Software License: * * Permission is hereby granted, free of charge, to any person or organization * obtaining a copy of the software and accompanying documentation covered by * this license (the "Software") to use, reproduce, display, distribute, * execute, and transmit the Software, and to prepare derivative works of the * Software, and to permit third-parties to whom the Software is furnished to * do so, all subject to the following: * * The copyright notices in the Software and this entire statement, including * the above license grant, this restriction and the following disclaimer, * must be included in all copies of the Software, in whole or in part, and * all derivative works of the Software, unless such copies or derivative * works are solely in the form of machine-executable object code generated by * a source language processor. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. **/ static inline VEC_ALWAYS_INLINE intmax_t vec_rshift(intmax_t x, unsigned int y) { static const uintmax_t roffset = UINTMAX_C(1) << ((sizeof(intmax_t) * CHAR_BIT) - 1); uintmax_t urx = (uintmax_t)x; urx += roffset; urx >>= y; urx -= roffset >> y; return (intmax_t)urx; } static inline VEC_ALWAYS_INLINE intmax_t vec_lshift(intmax_t x, unsigned int y) { static const uintmax_t roffset = UINTMAX_C(1) << ((sizeof(intmax_t) * CHAR_BIT) - 1); uintmax_t urx = (uintmax_t)x; urx += roffset; urx <<= y; urx -= roffset << y; return (intmax_t)urx; } /* --------------------------------------------------------------- */ /* Implementation includes */ #define VEC_OPERATION_DECL(sign, bits, size, ret, op, params) \ static inline VEC_ALWAYS_INLINE ret v##sign##int##bits##x##size##_##op params #define VEC_OPERATION_THIS_DECL(sign, bits, size, op, params) \ VEC_OPERATION_DECL(sign, bits, size, v##sign##int##bits##x##size, op, params) #define VEC_TWOWAY_DECL(sign, bits, size, op) \ VEC_OPERATION_THIS_DECL(sign, bits, size, op, (v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2)) #define VEC_DECL_SPLAT(sign, bits, size) VEC_OPERATION_THIS_DECL(sign, bits, size, splat, (sign##int##bits##_t x)) #define VEC_DECL_LOAD(sign, bits, size) VEC_OPERATION_THIS_DECL(sign, bits, size, load, (const sign##int##bits##_t in[size])) #define VEC_DECL_STORE(sign, bits, size) VEC_OPERATION_DECL(sign, bits, size, void, store, (v##sign##int##bits##x##size vec, sign##int##bits##_t out[size])) #define VEC_DECL_ADD(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, add) #define VEC_DECL_SUB(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, sub) #define VEC_DECL_MUL(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, mul) #define VEC_DECL_DIV(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, div) #define VEC_DECL_AND(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, and) #define VEC_DECL_OR(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, or) #define VEC_DECL_XOR(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, xor) #define VEC_DECL_AVG(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, avg) #define VEC_DECL_SHIFT(sign, bits, size, vectype, way) VEC_OPERATION_THIS_DECL(sign, bits, size, vectype##way##shift, (v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2)) #define VEC_DECL_NOT(sign, bits, size) VEC_OPERATION_THIS_DECL(sign, bits, size, not, (v##sign##int##bits##x##size vec)) /* comparisons */ #define VEC_DECL_CMPLT(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, cmplt) #define VEC_DECL_CMPGT(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, cmpgt) #define VEC_DECL_CMPEQ(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, cmpeq) #define VEC_DECL_CMPLE(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, cmple) #define VEC_DECL_CMPGE(sign, bits, size) VEC_TWOWAY_DECL(sign, bits, size, cmpge) /* Generic variations. */ #define VEC_GENERIC_SPLAT(sign, bits, size) \ VEC_DECL_SPLAT(sign, bits, size) \ { \ sign##int##bits##_t va[size]; \ for (int i = 0; i < size; i++) va[i] = x; \ return v##sign##int##bits##x##size##_load(va); \ } #define VEC_GENERIC_DIVIDE(sign, bits, size) \ VEC_DECL_DIV(sign, bits, size) \ { \ sign##int##bits##_t vec1a[size], vec2a[size]; \ \ v##sign##int##bits##x##size##_store(vec1, vec1a); \ v##sign##int##bits##x##size##_store(vec2, vec2a); \ \ for (int i = 0; i < size; i++) vec1a[i] = (vec2a[i]) ? (vec1a[i] / vec2a[i]) : 0; \ \ return v##sign##int##bits##x##size##_load(vec1a); \ } #define VEC_GENERIC_SHIFT(sign, bits, size, vectype, way) \ VEC_DECL_SHIFT(sign, bits, size, vectype, way) \ { \ sign##int##bits##_t vec1a[size], vec2a[size]; \ \ v##sign##int##bits##x##size##_store(vec1, vec1a); \ vuint##bits##x##size##_store(vec2, vec2a); \ \ for (int i = 0; i < size; i++) vec1a[i] = vec_##sign##vectype##way##shift(vec1a[i], vec2a[i]); \ \ return v##sign##int##bits##x##size##_load(vec1a); \ } #define VEC_GENERIC_SHIFTS(sign, bits, size) \ VEC_GENERIC_SHIFT(sign, bits, size, , l) /* left shift */ \ VEC_GENERIC_SHIFT(sign, bits, size, , r) /* arithmetic right shift */ \ VEC_GENERIC_SHIFT(sign, bits, size, l, r) /* logical right shift */ #define VEC_GENERIC_AVG(sign, bits, size) \ VEC_DECL_AVG(sign, bits, size) \ { \ return v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size##_mul(vec1, vec2), v##sign##int##bits##x##size##_splat(2)); \ } #define VEC_GENERIC_THAN_OR_EQUAL(sign, bits, size) \ VEC_DECL_NOT(sign, bits, size); \ \ VEC_DECL_CMPLE(sign, bits, size) \ { \ return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmpgt(vec1, vec2)); \ } \ VEC_DECL_CMPGE(sign, bits, size) \ { \ return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmplt(vec1, vec2)); \ } #define VEC_GENERIC_COMPARISON(sign, bits, size, name, op) \ VEC_DECL_CMP##name(sign, bits, size) \ { \ sign##int##bits##_t vec1a[size], vec2a[size]; \ \ v##sign##int##bits##x##size##_store(vec1, vec1a); \ v##sign##int##bits##x##size##_store(vec2, vec2a); \ \ for (int i = 0; i < size; i++) vec1a[i] = (vec1a[i] op vec2a[i]) ? UINT##bits##_MAX : 0; \ \ return v##sign##int##bits##x##size##_load(vec1a); \ } #define VEC_GENERIC_COMPARISONS(sign, bits, size) \ VEC_GENERIC_COMPARISON(sign, bits, size, LT, <) \ VEC_GENERIC_COMPARISON(sign, bits, size, GT, >) \ VEC_GENERIC_COMPARISON(sign, bits, size, EQ, ==) \ VEC_GENERIC_THAN_OR_EQUAL(sign, bits, size) /* POWER altivec */ #ifdef __ALTIVEC__ # include "impl/altivec.h" #endif /* x86 SSE2 */ #ifdef __SSE2__ # include "impl/sse2.h" #endif #ifdef VEC_HAVE_GNUC_VECTORS # include "impl/gcc.h" #endif #include "impl/generic.h" /* ----------------------------------------------------------------- */ /* bitwise NOT is just an XOR with UINT[BITS]_MAX */ #define DEFINE_NOT_OPERATION(sign, bits, size) \ VEC_DECL_NOT(sign, bits, size) \ { \ return v##sign##int##bits##x##size##_xor(vec, v##sign##int##bits##x##size##_splat(UINT##bits##_MAX)); \ } DEFINE_NOT_OPERATION(, 8, 16) DEFINE_NOT_OPERATION(, 16, 8) DEFINE_NOT_OPERATION(, 32, 4) DEFINE_NOT_OPERATION(, 64, 2) DEFINE_NOT_OPERATION(u, 8, 16) DEFINE_NOT_OPERATION(u, 16, 8) DEFINE_NOT_OPERATION(u, 32, 4) DEFINE_NOT_OPERATION(u, 64, 2) #undef DEFINE_NOT_OPERATION /* ---------------------------------------------------------------- */ /* cleanup */ #undef VEC_OPERATION_DECL #undef VEC_OPERATION_THIS_DECL #undef VEC_TWOWAY_DECL #undef VEC_DECL_SPLAT #undef VEC_DECL_LOAD #undef VEC_DECL_STORE #undef VEC_DECL_ADD #undef VEC_DECL_SUB #undef VEC_DECL_MUL #undef VEC_DECL_DIV #undef VEC_DECL_AND #undef VEC_DECL_OR #undef VEC_DECL_XOR #undef VEC_DECL_AVG #undef VEC_DECL_SHIFT #undef VEC_DECL_NOT #undef VEC_DECL_CMPLT #undef VEC_DECL_CMPGT #undef VEC_DECL_CMPEQ #undef VEC_DECL_CMPLE #undef VEC_DECL_CMPGE #undef VEC_GENERIC_SPLAT #undef VEC_GENERIC_DIVIDE #undef VEC_GENERIC_SHIFT #undef VEC_GENERIC_SHIFTS #undef VEC_GENERIC_AVG #undef VEC_GENERIC_THAN_OR_EQUAL #undef VEC_GENERIC_COMPARISON #undef VEC_GENERIC_COMPARISONS /* ---------------------------------------------------------------- */ #endif /* VEC_VEC_H_ */