Mercurial > vec
view include/vec/vec.h @ 33:4655b49eaf9f
Backed out changeset 6c91cd9a2f2d
author | Paper <paper@tflc.us> |
---|---|
date | Fri, 25 Apr 2025 17:40:42 -0400 |
parents | bf6ad516f1e6 |
children | 8b5e0974fd41 |
line wrap: on
line source
/** * vec - a tiny SIMD vector library in C99 * * Copyright (c) 2024 Paper * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. **/ #ifndef VEC_VEC_H_ #define VEC_VEC_H_ #ifdef __cplusplus extern "C" { #endif // different on every implementation #include "vec/types.h" #define VEC_SEMVER_ATLEAST(a, b, c, x, y, z) \ (((a) >= (x)) && \ ((a) > x || (b) >= (y)) && \ ((a) > x || (b) > (y) || (c) >= (z))) // MSVC sucks and its a pain in the ass to find out this stuff #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) # define VEC_MSVC_VERSION_MAJOR (_MSC_FULL_VER / 10000000) # define VEC_MSVC_VERSION_MINOR ((_MSC_FULL_VER % 10000000) / 100000) # define VEC_MSVC_VERSION_PATCH (_MSC_FULL_VER % 100000) #elif defined(_MSC_FULL_VER) # define VEC_MSVC_VERSION_MAJOR (_MSC_FULL_VER / 1000000) # define VEC_MSVC_VERSION_MINOR ((_MSC_FULL_VER % 1000000) / 10000) # define VEC_MSVC_VERSION_PATCH (_MSC_FULL_VER % 10000) #elif defined(_MSC_VER) # define VEC_MSVC_VERSION_MAJOR (_MSC_VER / 100) # define VEC_MSVC_VERSION_MINOR (_MSC_VER % 100) # define VEC_MSVC_VERSION_PATCH (0) #endif #ifdef VEC_MSVC_VERSION_MAJOR # define VEC_MSVC_ATLEAST(x, y, z) \ VEC_SEMVER_ATLEAST(VEC_MSVC_VERSION_MAJOR, VEC_MSVC_VERSION_MINOR, VEC_MSVC_VERSION_PATCH, x, y, z) #else # define VEC_MSVC_ATLEAST(x, y, z) (0) #endif // now we get to GNU C stuff (not necessarily GCC) #ifdef __GNUC__ # define VEC_GNUC_ATLEAST(x, y, z) \ VEC_SEMVER_ATLEAST(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, x, y, z) #else # define VEC_GNUC_ATLEAST(x, y, z) (0) #endif #if defined(__has_attribute) # define VEC_GNUC_HAS_ATTRIBUTE(x, major, minor, patch) __has_attribute(x) #else # define VEC_GNUC_HAS_ATTRIBUTE(x, major, minor, patch) VEC_GNUC_ATLEAST(major, minor, patch) #endif // this isn't used anywhere (yet!) but still useful to have #if (__cplusplus >= 201103L) || (__STDC_VERSION__ >= 202311L) # define VEC_STATIC_ASSERT(x, msg) static_assert(x, msg) #elif (__STDC_VERSION__ >= 201112L) # define VEC_STATIC_ASSERT(x, msg) _Static_assert(x, msg) #else # define VEC_STATIC_ASSERT(x, msg) \ extern int (*vec_impl_Static_assert_function_(void)) \ [!!sizeof (struct { int __error_if_negative: (x) ? 2 : -1; })] #endif ////////////////////////////////////////////////////////////////////////////// // Detect compiler SIMD support // Current known alignments for each implementation, ordered by // architecture and instruction set: // // /---------------------------------------------------\ // | Architecture | Instruction Set | Bits | Alignment | // |---------------------------------------------------| // | ARM | NEON | 64 | 8 bytes | // | ARM | NEON | 128 | 16 bytes | // | PowerPC | AltiVec | 128 | 16 bytes | // | x86 | MMX | 64 | None? | // | x86 | SSE2 | 128 | 16 bytes | // | x86 | AVX2 | 256 | 32 bytes | // | x86 | AVX512-F | 512 | 64 bytes | // \---------------------------------------------------/ // // If these ever have to be extended or changed, there absolutely *must* // be a new major release of vec, since that would change the ABI... #define VINT8x2_ALIGNMENT 2 #define VUINT8x2_ALIGNMENT 2 #define VINT8x4_ALIGNMENT 4 #define VINT16x2_ALIGNMENT 4 #define VUINT8x4_ALIGNMENT 4 #define VUINT16x2_ALIGNMENT 4 // 64-bit #define VINT8x8_ALIGNMENT 8 #define VINT16x4_ALIGNMENT 8 #define VINT32x2_ALIGNMENT 8 #define VUINT8x8_ALIGNMENT 8 #define VUINT16x4_ALIGNMENT 8 #define VUINT32x2_ALIGNMENT 8 // 128-bit #define VINT8x16_ALIGNMENT 16 #define VINT16x8_ALIGNMENT 16 #define VINT32x4_ALIGNMENT 16 #define VINT64x2_ALIGNMENT 16 #define VUINT8x16_ALIGNMENT 16 #define VUINT16x8_ALIGNMENT 16 #define VUINT32x4_ALIGNMENT 16 #define VUINT64x2_ALIGNMENT 16 // 256-bit #define VINT8x32_ALIGNMENT 32 #define VINT16x16_ALIGNMENT 32 #define VINT32x8_ALIGNMENT 32 #define VINT64x4_ALIGNMENT 32 #define VUINT8x32_ALIGNMENT 32 #define VUINT16x16_ALIGNMENT 32 #define VUINT32x8_ALIGNMENT 32 #define VUINT64x4_ALIGNMENT 32 // 512-bit #define VINT8x64_ALIGNMENT 64 #define VINT16x32_ALIGNMENT 64 #define VINT32x16_ALIGNMENT 64 #define VINT64x8_ALIGNMENT 64 #define VUINT8x64_ALIGNMENT 64 #define VUINT16x32_ALIGNMENT 64 #define VUINT32x16_ALIGNMENT 64 #define VUINT64x8_ALIGNMENT 64 ////////////////////////////////////////////////////////////////////////////// // portable bit shift // these functions aren't very necessary :/ inline vec_uintmax vec_lrshift(vec_uintmax x, unsigned int y) { return x >> y; } inline vec_uintmax vec_llshift(vec_uintmax x, unsigned int y) { return x << y; } inline vec_uintmax vec_urshift(vec_uintmax x, unsigned int y) { return x >> y; } inline vec_uintmax vec_ulshift(vec_uintmax x, unsigned int y) { return x << y; } /** * Arithmetic shifts; based off code from OpenMPT, which is under * the Boost Software License: * * Permission is hereby granted, free of charge, to any person or organization * obtaining a copy of the software and accompanying documentation covered by * this license (the "Software") to use, reproduce, display, distribute, * execute, and transmit the Software, and to prepare derivative works of the * Software, and to permit third-parties to whom the Software is furnished to * do so, all subject to the following: * * The copyright notices in the Software and this entire statement, including * the above license grant, this restriction and the following disclaimer, * must be included in all copies of the Software, in whole or in part, and * all derivative works of the Software, unless such copies or derivative * works are solely in the form of machine-executable object code generated by * a source language processor. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. **/ inline vec_intmax vec_rshift(vec_intmax x, unsigned int y) { static const vec_uintmax roffset = ((vec_uintmax)1) << ((sizeof(vec_intmax) * 8) - 1); union { vec_intmax d; vec_uintmax u; } xx; xx.d = x; xx.u += roffset; xx.u >>= y; xx.u -= roffset >> y; return xx.d; } inline vec_intmax vec_lshift(vec_intmax x, unsigned int y) { static const vec_uintmax roffset = ((vec_uintmax)1) << ((sizeof(vec_intmax) * 8) - 1); union { vec_intmax d; vec_uintmax u; } xx; xx.d = x; xx.u += roffset; xx.u <<= y; xx.u -= roffset << y; return xx.d; } inline vec_intmax vec_avg(vec_intmax x, vec_intmax y) { if ((x < 0) == (y < 0)) { // same sign // this gets the equivalent of: // vec_int32 r = ((vec_int64)x + (vec_int64)y) / 2; vec_intmax r = (x / 2) + (y / 2) + (((x % 2) + (y % 2)) / 2); // FIXME emulate AltiVec quirks return r; } else { vec_intmax r = (x + y) / 2; // FIXME emulate AltiVec quirks return r; } } inline vec_uintmax vec_uavg(vec_uintmax x, vec_uintmax y) { return (x / 2) + (y / 2) + ((x | y) & 1); } ////////////////////////////////////////////////////////////////////////////// // array alignment #if (__cplusplus >= 201103L) || (__STDC_VERSION__ >= 202311L) # define VEC_ALIGNOF(type) alignof(type) #elif (__STDC_VERSION__ >= 201112L) # define VEC_ALIGNOF(type) _Alignof(type) #elif defined(HAVE_STDDEF_H) // already included # define VEC_ALIGNOF(type) \ (offsetof(struct { char slot1; type slot2; }, slot2)) #else // inline offsetof # define VEC_ALIGNOF(type) \ ((vec_uintsize)((char *)&((struct { char slot1; type slot2; } *)0)->slot2 - (char *)0)) #endif #if (__cplusplus >= 201103L) || (__STDC_VERSION__ >= 202311L) # define VEC_ALIGNAS(x) alignas(x) #elif (__STDC_VERSION__ >= 201112L) # define VEC_ALIGNAS(x) _Alignas(x) #elif VEC_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0) # define VEC_ALIGNAS(x) __attribute__((__aligned__(x))) #elif VEC_MSVC_ATLEAST(0, 0, 0) // FIXME which version? # define VEC_ALIGNAS(x) __declspec(align(x)) #else # error vec: vec requires compiler alignment support #endif // this wart is here because originally vec didn't require that // there be compiler support for alignment. now that we *do*, // we should at least keep providing this macro... #ifdef VEC_ALIGNAS # define VEC_ALIGNED_ARRAY(type, var, length, align) \ VEC_ALIGNAS(align) type var[length] # define VEC_ALIGNED_ARRAY_SIZEOF(var, align) \ (sizeof(var)) #endif #define VEC_ALIGNED_ARRAY_LENGTH(var) \ (VEC_ALIGNED_ARRAY_SIZEOF(var)/sizeof(*var)) ////////////////////////////////////////////////////////////////////////////////////// // predefined variants for each vector type ////////////////////////////////////////////////////////////////////////////////////// // 16-bit #define VINT8x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int8, var, 2, VINT8x2_ALIGNMENT) #define VINT8x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x2_ALIGNMENT) #define VINT8x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x2_ALIGNMENT) #define VINT8x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x2_ALIGNMENT == 0) #define VUINT8x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint8, var, 2, VUINT8x2_ALIGNMENT) #define VUINT8x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x2_ALIGNMENT) #define VUINT8x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x2_ALIGNMENT) #define VUINT8x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x2_ALIGNMENT == 0) ////////////////////////////////////////////////////////////////////////////////////// // 32-bit #define VINT8x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int8, var, 4, VINT8x4_ALIGNMENT) #define VINT8x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x4_ALIGNMENT) #define VINT8x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x4_ALIGNMENT) #define VINT8x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x4_ALIGNMENT == 0) #define VINT16x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int16, var, 2, VINT16x2_ALIGNMENT) #define VINT16x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT16x2_ALIGNMENT) #define VINT16x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT16x2_ALIGNMENT) #define VINT16x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT16x2_ALIGNMENT == 0) #define VUINT8x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint8, var, 4, VUINT8x4_ALIGNMENT) #define VUINT8x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x4_ALIGNMENT) #define VUINT8x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x4_ALIGNMENT) #define VUINT8x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x4_ALIGNMENT == 0) #define VUINT16x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint16, var, 2, VUINT16x2_ALIGNMENT) #define VUINT16x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT16x2_ALIGNMENT) #define VUINT16x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT16x2_ALIGNMENT) #define VUINT16x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT16x2_ALIGNMENT == 0) ////////////////////////////////////////////////////////////////////////////////////// // 64-bit #define VINT8x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int8, var, 8, VINT8x8_ALIGNMENT) #define VINT8x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x8_ALIGNMENT) #define VINT8x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x8_ALIGNMENT) #define VINT8x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x8_ALIGNMENT == 0) #define VINT16x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int16, var, 4, VINT16x4_ALIGNMENT) #define VINT16x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT16x4_ALIGNMENT) #define VINT16x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT16x4_ALIGNMENT) #define VINT16x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT16x4_ALIGNMENT == 0) #define VINT32x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int32, var, 2, VINT32x2_ALIGNMENT) #define VINT32x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT32x2_ALIGNMENT) #define VINT32x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT32x2_ALIGNMENT) #define VINT32x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT32x2_ALIGNMENT == 0) #define VUINT8x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint8, var, 8, VUINT8x8_ALIGNMENT) #define VUINT8x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x8_ALIGNMENT) #define VUINT8x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x8_ALIGNMENT) #define VUINT8x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x8_ALIGNMENT == 0) #define VUINT16x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint16, var, 4, VUINT16x4_ALIGNMENT) #define VUINT16x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT16x4_ALIGNMENT) #define VUINT16x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT16x4_ALIGNMENT) #define VUINT16x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT16x4_ALIGNMENT == 0) #define VUINT32x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint32, var, 2, VUINT32x2_ALIGNMENT) #define VUINT32x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT32x2_ALIGNMENT) #define VUINT32x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT32x2_ALIGNMENT) #define VUINT32x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT32x2_ALIGNMENT == 0) ////////////////////////////////////////////////////////////////////////////////////// // 128-bit #define VINT8x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int8, var, 16, VINT8x16_ALIGNMENT) #define VINT8x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x16_ALIGNMENT) #define VINT8x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x16_ALIGNMENT) #define VINT8x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x16_ALIGNMENT == 0) #define VINT16x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int16, var, 8, VINT16x8_ALIGNMENT) #define VINT16x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT16x8_ALIGNMENT) #define VINT16x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT16x8_ALIGNMENT) #define VINT16x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT16x8_ALIGNMENT == 0) #define VINT32x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int32, var, 4, VINT32x4_ALIGNMENT) #define VINT32x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT32x4_ALIGNMENT) #define VINT32x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT32x4_ALIGNMENT) #define VINT32x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT32x4_ALIGNMENT == 0) #define VINT64x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int64, var, 2, VINT64x2_ALIGNMENT) #define VINT64x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT64x2_ALIGNMENT) #define VINT64x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT64x2_ALIGNMENT) #define VINT64x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT64x2_ALIGNMENT == 0) #define VUINT8x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint8, var, 16, VUINT8x16_ALIGNMENT) #define VUINT8x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x16_ALIGNMENT) #define VUINT8x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x16_ALIGNMENT) #define VUINT8x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x16_ALIGNMENT == 0) #define VUINT16x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint16, var, 8, VUINT16x8_ALIGNMENT) #define VUINT16x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT16x8_ALIGNMENT) #define VUINT16x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT16x8_ALIGNMENT) #define VUINT16x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT16x8_ALIGNMENT == 0) #define VUINT32x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint32, var, 4, VUINT32x4_ALIGNMENT) #define VUINT32x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT32x4_ALIGNMENT) #define VUINT32x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT32x4_ALIGNMENT) #define VUINT32x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT32x4_ALIGNMENT == 0) #define VUINT64x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint64, var, 2, VUINT64x2_ALIGNMENT) #define VUINT64x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT64x2_ALIGNMENT) #define VUINT64x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT64x2_ALIGNMENT) #define VUINT64x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT64x2_ALIGNMENT == 0) ////////////////////////////////////////////////////////////////////////////////////// // 256-bit #define VINT8x32_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int8, var, 32, VINT8x32_ALIGNMENT) #define VINT8x32_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x32_ALIGNMENT) #define VINT8x32_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x32_ALIGNMENT) #define VINT8x32_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x32_ALIGNMENT == 0) #define VINT16x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int16, var, 16, VINT16x16_ALIGNMENT) #define VINT16x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT16x16_ALIGNMENT) #define VINT16x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT16x16_ALIGNMENT) #define VINT16x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT16x16_ALIGNMENT == 0) #define VINT32x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int32, var, 8, VINT32x8_ALIGNMENT) #define VINT32x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT32x8_ALIGNMENT) #define VINT32x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT32x8_ALIGNMENT) #define VINT32x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT32x8_ALIGNMENT == 0) #define VINT64x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int64, var, 4, VINT64x4_ALIGNMENT) #define VINT64x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT64x4_ALIGNMENT) #define VINT64x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT64x4_ALIGNMENT) #define VINT64x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT64x4_ALIGNMENT == 0) #define VUINT8x32_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint8, var, 32, VUINT8x32_ALIGNMENT) #define VUINT8x32_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x32_ALIGNMENT) #define VUINT8x32_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x32_ALIGNMENT) #define VUINT8x32_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x32_ALIGNMENT == 0) #define VUINT16x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint16, var, 16, VUINT16x16_ALIGNMENT) #define VUINT16x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT16x16_ALIGNMENT) #define VUINT16x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT16x16_ALIGNMENT) #define VUINT16x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT16x16_ALIGNMENT == 0) #define VUINT32x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint32, var, 8, VUINT32x8_ALIGNMENT) #define VUINT32x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT32x8_ALIGNMENT) #define VUINT32x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT32x8_ALIGNMENT) #define VUINT32x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT32x8_ALIGNMENT == 0) #define VUINT64x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint64, var, 4, VUINT64x4_ALIGNMENT) #define VUINT64x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT64x4_ALIGNMENT) #define VUINT64x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT64x4_ALIGNMENT) #define VUINT64x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT64x4_ALIGNMENT == 0) ////////////////////////////////////////////////////////////////////////////////////// // 512-bit #define VINT8x64_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int8, var, 64, VINT8x64_ALIGNMENT) #define VINT8x64_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT8x64_ALIGNMENT) #define VINT8x64_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT8x64_ALIGNMENT) #define VINT8x64_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT8x64_ALIGNMENT == 0) #define VINT16x32_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int16, var, 32, VINT16x32_ALIGNMENT) #define VINT16x32_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT16x32_ALIGNMENT) #define VINT16x32_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT16x32_ALIGNMENT) #define VINT16x32_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT16x16_ALIGNMENT == 0) #define VINT32x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int32, var, 16, VINT32x16_ALIGNMENT) #define VINT32x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT32x16_ALIGNMENT) #define VINT32x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT32x16_ALIGNMENT) #define VINT32x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT32x16_ALIGNMENT == 0) #define VINT64x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_int64, var, 8, VINT64x8_ALIGNMENT) #define VINT64x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VINT64x8_ALIGNMENT) #define VINT64x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VINT64x8_ALIGNMENT) #define VINT64x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VINT64x8_ALIGNMENT == 0) #define VUINT8x64_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint8, var, 64, VUINT8x64_ALIGNMENT) #define VUINT8x64_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT8x64_ALIGNMENT) #define VUINT8x64_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT8x64_ALIGNMENT) #define VUINT8x64_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT8x64_ALIGNMENT == 0) #define VUINT16x32_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint16, var, 32, VUINT16x32_ALIGNMENT) #define VUINT16x32_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT16x32_ALIGNMENT) #define VUINT16x32_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT16x32_ALIGNMENT) #define VUINT16x32_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT16x16_ALIGNMENT == 0) #define VUINT32x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint32, var, 16, VUINT32x16_ALIGNMENT) #define VUINT32x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT32x16_ALIGNMENT) #define VUINT32x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT32x16_ALIGNMENT) #define VUINT32x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT32x16_ALIGNMENT == 0) #define VUINT64x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_uint64, var, 8, VUINT64x8_ALIGNMENT) #define VUINT64x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VUINT64x8_ALIGNMENT) #define VUINT64x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VUINT64x8_ALIGNMENT) #define VUINT64x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT64x8_ALIGNMENT == 0) ////////////////////////////////////////////////////////////////////////////// // Defines the structures for each vector type // 16-bit typedef struct { VEC_ALIGNAS(VUINT8x2_ALIGNMENT) vec_uint8 bytes[2]; } vuint8x2; typedef struct { VEC_ALIGNAS(VINT8x2_ALIGNMENT) vec_uint8 bytes[2]; } vint8x2; // 32-bit typedef struct { VEC_ALIGNAS(VUINT8x4_ALIGNMENT) vec_uint8 bytes[4]; } vuint8x4; typedef struct { VEC_ALIGNAS(VUINT16x2_ALIGNMENT) vec_uint8 bytes[4]; } vuint16x2; typedef struct { VEC_ALIGNAS(VINT8x4_ALIGNMENT) vec_uint8 bytes[4]; } vint8x4; typedef struct { VEC_ALIGNAS(VINT16x2_ALIGNMENT) vec_uint8 bytes[4]; } vint16x2; // 64-bit typedef struct { VEC_ALIGNAS(VUINT8x8_ALIGNMENT) vec_uint8 bytes[8]; } vuint8x8; typedef struct { VEC_ALIGNAS(VUINT16x4_ALIGNMENT) vec_uint8 bytes[8]; } vuint16x4; typedef struct { VEC_ALIGNAS(VUINT32x2_ALIGNMENT) vec_uint8 bytes[8]; } vuint32x2; typedef struct { VEC_ALIGNAS(VINT8x8_ALIGNMENT) vec_uint8 bytes[8]; } vint8x8; typedef struct { VEC_ALIGNAS(VINT16x4_ALIGNMENT) vec_uint8 bytes[8]; } vint16x4; typedef struct { VEC_ALIGNAS(VINT32x2_ALIGNMENT) vec_uint8 bytes[8]; } vint32x2; // 128-bit typedef union { VEC_ALIGNAS(VUINT8x16_ALIGNMENT) vec_uint8 bytes[16]; } vuint8x16; typedef union { VEC_ALIGNAS(VUINT16x8_ALIGNMENT) vec_uint8 bytes[16]; } vuint16x8; typedef union { VEC_ALIGNAS(VUINT32x4_ALIGNMENT) vec_uint8 bytes[16]; } vuint32x4; typedef union { VEC_ALIGNAS(VUINT64x2_ALIGNMENT) vec_uint8 bytes[16]; } vuint64x2; typedef union { VEC_ALIGNAS(VINT8x16_ALIGNMENT) vec_uint8 bytes[16]; } vint8x16; typedef union { VEC_ALIGNAS(VINT16x8_ALIGNMENT) vec_uint8 bytes[16]; } vint16x8; typedef union { VEC_ALIGNAS(VINT32x4_ALIGNMENT) vec_uint8 bytes[16]; } vint32x4; typedef union { VEC_ALIGNAS(VINT64x2_ALIGNMENT) vec_uint8 bytes[16]; } vint64x2; // 256-bit typedef union { VEC_ALIGNAS(VUINT8x32_ALIGNMENT) vec_uint8 bytes[32]; } vuint8x32; typedef union { VEC_ALIGNAS(VUINT16x16_ALIGNMENT) vec_uint8 bytes[32]; } vuint16x16; typedef union { VEC_ALIGNAS(VUINT32x8_ALIGNMENT) vec_uint8 bytes[32]; } vuint32x8; typedef union { VEC_ALIGNAS(VUINT64x4_ALIGNMENT) vec_uint8 bytes[32]; } vuint64x4; typedef union { VEC_ALIGNAS(VINT8x32_ALIGNMENT) vec_uint8 bytes[32]; } vint8x32; typedef union { VEC_ALIGNAS(VINT16x16_ALIGNMENT) vec_uint8 bytes[32]; } vint16x16; typedef union { VEC_ALIGNAS(VINT32x8_ALIGNMENT) vec_uint8 bytes[32]; } vint32x8; typedef union { VEC_ALIGNAS(VINT64x4_ALIGNMENT) vec_uint8 bytes[32]; } vint64x4; // 512-bit typedef union { VEC_ALIGNAS(VUINT8x64_ALIGNMENT) vec_uint8 bytes[64]; } vuint8x64; typedef union { VEC_ALIGNAS(VUINT16x32_ALIGNMENT) vec_uint8 bytes[64]; } vuint16x32; typedef union { VEC_ALIGNAS(VUINT32x16_ALIGNMENT) vec_uint8 bytes[64]; } vuint32x16; typedef union { VEC_ALIGNAS(VUINT64x8_ALIGNMENT) vec_uint8 bytes[64]; } vuint64x8; typedef union { VEC_ALIGNAS(VINT8x64_ALIGNMENT) vec_uint8 bytes[64]; } vint8x64; typedef union { VEC_ALIGNAS(VINT16x32_ALIGNMENT) vec_uint8 bytes[64]; } vint16x32; typedef union { VEC_ALIGNAS(VINT32x16_ALIGNMENT) vec_uint8 bytes[64]; } vint32x16; typedef union { VEC_ALIGNAS(VINT64x8_ALIGNMENT) vec_uint8 bytes[64]; } vint64x8; // --------------------------------------------------------------------------------- // function declarations int vec_init(void); ////////////////////////////////////////////////////////////////////////////// // these are, for the most part, meant to be used internally // okay, these are filled in for each supported backend. // `and', `or', `xor', and `nor' have to be prefixed with // `b' because of <iso646.h>/cxxisms #define VEC_DEFINE_IMPL_STRUCT_SIGN(sign, bits, size) \ typedef struct { \ v##sign##int##bits##x##size (*splat)(vec_##sign##int##bits x); \ v##sign##int##bits##x##size (*load_aligned)(const vec_##sign##int##bits in[size]); \ v##sign##int##bits##x##size (*load)(const vec_##sign##int##bits in[size]); \ void (*store_aligned)(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \ void (*store)(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \ v##sign##int##bits##x##size (*add)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*sub)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*mul)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*div)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*avg)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*band)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*bor)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*bxor)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*bnot)(v##sign##int##bits##x##size vec); \ v##sign##int##bits##x##size (*lshift)(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \ v##sign##int##bits##x##size (*rshift)(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \ v##sign##int##bits##x##size (*lrshift)(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \ v##sign##int##bits##x##size (*cmplt)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*cmple)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*cmpeq)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*cmpge)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ v##sign##int##bits##x##size (*cmpgt)(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ } v##sign##int##bits##x##size##_impl; #define VEC_DEFINE_IMPL_STRUCT(bits, size) \ VEC_DEFINE_IMPL_STRUCT_SIGN( , bits, size) \ VEC_DEFINE_IMPL_STRUCT_SIGN(u, bits, size) // 16-bit VEC_DEFINE_IMPL_STRUCT(8, 2) // 32-bit VEC_DEFINE_IMPL_STRUCT(8, 4) VEC_DEFINE_IMPL_STRUCT(16, 2) // 64-bit VEC_DEFINE_IMPL_STRUCT(8, 8) VEC_DEFINE_IMPL_STRUCT(16, 4) VEC_DEFINE_IMPL_STRUCT(32, 2) // 128-bit VEC_DEFINE_IMPL_STRUCT(8, 16) VEC_DEFINE_IMPL_STRUCT(16, 8) VEC_DEFINE_IMPL_STRUCT(32, 4) VEC_DEFINE_IMPL_STRUCT(64, 2) // 256-bit VEC_DEFINE_IMPL_STRUCT(8, 32) VEC_DEFINE_IMPL_STRUCT(16, 16) VEC_DEFINE_IMPL_STRUCT(32, 8) VEC_DEFINE_IMPL_STRUCT(64, 4) // 512-bit VEC_DEFINE_IMPL_STRUCT(8, 64) VEC_DEFINE_IMPL_STRUCT(16, 32) VEC_DEFINE_IMPL_STRUCT(32, 16) VEC_DEFINE_IMPL_STRUCT(64, 8) #undef VEC_DEFINE_IMPL_STRUCT #undef VEC_DEFINE_IMPL_STRUCT_SIGN // 16-bit extern const vint8x2_impl *vint8x2_impl_cpu; extern const vuint8x2_impl *vuint8x2_impl_cpu; // 32-bit extern const vint8x4_impl *vint8x4_impl_cpu; extern const vuint8x4_impl *vuint8x4_impl_cpu; extern const vint16x2_impl *vint16x2_impl_cpu; extern const vuint16x2_impl *vuint16x2_impl_cpu; // 64-bit extern const vint8x8_impl *vint8x8_impl_cpu; extern const vuint8x8_impl *vuint8x8_impl_cpu; extern const vint16x4_impl *vint16x4_impl_cpu; extern const vuint16x4_impl *vuint16x4_impl_cpu; extern const vint32x2_impl *vint32x2_impl_cpu; extern const vuint32x2_impl *vuint32x2_impl_cpu; // 128-bit extern const vint8x16_impl *vint8x16_impl_cpu; extern const vuint8x16_impl *vuint8x16_impl_cpu; extern const vint16x8_impl *vint16x8_impl_cpu; extern const vuint16x8_impl *vuint16x8_impl_cpu; extern const vint32x4_impl *vint32x4_impl_cpu; extern const vuint32x4_impl *vuint32x4_impl_cpu; extern const vint64x2_impl *vint64x2_impl_cpu; extern const vuint64x2_impl *vuint64x2_impl_cpu; // 256-bit extern const vint8x32_impl *vint8x32_impl_cpu; extern const vuint8x32_impl *vuint8x32_impl_cpu; extern const vint16x16_impl *vint16x16_impl_cpu; extern const vuint16x16_impl *vuint16x16_impl_cpu; extern const vint32x8_impl *vint32x8_impl_cpu; extern const vuint32x8_impl *vuint32x8_impl_cpu; extern const vint64x4_impl *vint64x4_impl_cpu; extern const vuint64x4_impl *vuint64x4_impl_cpu; // 512-bit extern const vint8x64_impl *vint8x64_impl_cpu; extern const vuint8x64_impl *vuint8x64_impl_cpu; extern const vint16x32_impl *vint16x32_impl_cpu; extern const vuint16x32_impl *vuint16x32_impl_cpu; extern const vint32x16_impl *vint32x16_impl_cpu; extern const vuint32x16_impl *vuint32x16_impl_cpu; extern const vint64x8_impl *vint64x8_impl_cpu; extern const vuint64x8_impl *vuint64x8_impl_cpu; ////////////////////////////////////////////////////////////////////////////// // declared as inline for ! performance : ) #define VEC_DEFINE_OPERATIONS_SIGN(sign, bits, size) \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \ { \ return v##sign##int##bits##x##size##_impl_cpu->splat(x); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load_aligned(const vec_##sign##int##bits in[size]) \ { \ return v##sign##int##bits##x##size##_impl_cpu->load_aligned(in); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load(const vec_##sign##int##bits in[size]) \ { \ return v##sign##int##bits##x##size##_impl_cpu->load(in); \ } \ \ inline void v##sign##int##bits##x##size##_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \ { \ v##sign##int##bits##x##size##_impl_cpu->store_aligned(vec, out); \ } \ \ inline void v##sign##int##bits##x##size##_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \ { \ return v##sign##int##bits##x##size##_impl_cpu->store(vec, out); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->add(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->sub(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->mul(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->div(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->avg(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->band(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->bor(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->bxor(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \ { \ return v##sign##int##bits##x##size##_impl_cpu->bnot(vec); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->cmplt(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->cmple(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->cmpeq(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->cmpge(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->cmpgt(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->lshift(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->rshift(vec1, vec2); \ } \ \ inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ { \ return v##sign##int##bits##x##size##_impl_cpu->lrshift(vec1, vec2); \ } #define VEC_DEFINE_OPERATIONS(bits, size) \ VEC_DEFINE_OPERATIONS_SIGN( , bits, size) \ VEC_DEFINE_OPERATIONS_SIGN(u, bits, size) // 16-bit VEC_DEFINE_OPERATIONS(8, 2) // 32-bit VEC_DEFINE_OPERATIONS(8, 4) VEC_DEFINE_OPERATIONS(16, 2) // 64-bit VEC_DEFINE_OPERATIONS(8, 8) VEC_DEFINE_OPERATIONS(16, 4) VEC_DEFINE_OPERATIONS(32, 2) // 128-bit VEC_DEFINE_OPERATIONS(8, 16) VEC_DEFINE_OPERATIONS(16, 8) VEC_DEFINE_OPERATIONS(32, 4) VEC_DEFINE_OPERATIONS(64, 2) // 256-bit VEC_DEFINE_OPERATIONS(8, 32) VEC_DEFINE_OPERATIONS(16, 16) VEC_DEFINE_OPERATIONS(32, 8) VEC_DEFINE_OPERATIONS(64, 4) // 512-bit VEC_DEFINE_OPERATIONS(8, 64) VEC_DEFINE_OPERATIONS(16, 32) VEC_DEFINE_OPERATIONS(32, 16) VEC_DEFINE_OPERATIONS(64, 8) #undef VEC_DEFINE_OPERATIONS #undef VEC_DEFINE_OPERATIONS_SIGN #ifdef __cplusplus } #endif #endif /* VEC_VEC_H_ */