Mercurial > vec
view include/vec/impl/generic.h @ 3:3c5545b1568f
*: much better alignment support & tests
author | Paper <paper@tflc.us> |
---|---|
date | Tue, 22 Oct 2024 23:27:15 -0400 |
parents | f12b5dd4e18c |
children | 75ab77f874e2 |
line wrap: on
line source
/** * vec - a tiny SIMD vector library in plain C99 * * Copyright (c) 2024 Paper * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. **/ /* Generic array-based implementation. */ #include <stdint.h> #include <string.h> #define VEC_DEFINE_STRUCT(sign, bits, size) \ typedef struct { \ sign##int##bits##_t arr[size]; \ } v##sign##int##bits##x##size; #define VEC_DEFINE_OPERATIONS(sign, bits, size) \ VEC_DECL_LOAD_ALIGNED(sign, bits, size) \ { \ v##sign##int##bits##x##size vec; \ memcpy(vec.arr, in, sizeof(vec.arr)); \ return vec; \ } \ \ VEC_DECL_LOAD(sign, bits, size) \ { \ return v##sign##int##bits##x##size##_load_aligned(in); \ } \ \ VEC_DECL_STORE_ALIGNED(sign, bits, size) \ { \ memcpy(out, vec.arr, sizeof(vec.arr)); \ } \ \ VEC_DECL_STORE(sign, bits, size) \ { \ return v##sign##int##bits##x##size##_store_aligned(vec, out); \ } \ \ VEC_DECL_ADD(sign, bits, size) \ { \ for (int i = 0; i < size; i++) vec1.arr[i] += vec2.arr[i]; \ return vec1; \ } \ \ VEC_DECL_SUB(sign, bits, size) \ { \ for (int i = 0; i < size; i++) vec1.arr[i] -= vec2.arr[i]; \ return vec1; \ } \ \ VEC_DECL_MUL(sign, bits, size) \ { \ for (int i = 0; i < size; i++) vec1.arr[i] *= vec2.arr[i]; \ return vec1; \ } \ \ VEC_DECL_AND(sign, bits, size) \ { \ for (int i = 0; i < size; i++) vec1.arr[i] &= vec2.arr[i]; \ return vec1; \ } \ \ VEC_DECL_OR(sign, bits, size) \ { \ for (int i = 0; i < size; i++) vec1.arr[i] |= vec2.arr[i]; \ return vec1; \ } \ \ VEC_DECL_XOR(sign, bits, size) \ { \ for (int i = 0; i < size; i++) vec1.arr[i] ^= vec2.arr[i]; \ return vec1; \ } \ \ VEC_GENERIC_SPLAT(sign, bits, size) \ VEC_GENERIC_SHIFTS(sign, bits, size) \ VEC_GENERIC_DIVIDE(sign, bits, size) \ VEC_GENERIC_AVG(sign, bits, size) #ifndef VEC_VUINT8X16 # define VEC_VUINT8X16 VEC_DEFINE_STRUCT(u, 8, 16) # define VUINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ ((vuint8x16){ .arr = { a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } }) VEC_DEFINE_OPERATIONS(u, 8, 16) VEC_GENERIC_COMPARISONS(u, 8, 16) #endif #ifndef VEC_VUINT16X8 # define VEC_VUINT16X8 VEC_DEFINE_STRUCT(u, 16, 8) # define VUINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ ((vuint16x8){ .arr = { a, b, c, d, e, f, g, h } }) VEC_DEFINE_OPERATIONS(u, 16, 8) VEC_GENERIC_COMPARISONS(u, 16, 8) #endif #ifndef VEC_VUINT32X4 # define VEC_VUINT32X4 VEC_DEFINE_STRUCT(u, 32, 4) # define VUINT32x4_CONSTANT(a, b, c, d) \ ((vuint32x4){ .arr = { a, b, c, d } }) VEC_DEFINE_OPERATIONS(u, 32, 4) VEC_GENERIC_COMPARISONS(u, 32, 4) #endif #ifndef VEC_VUINT64X2 # define VEC_VUINT64X2 VEC_DEFINE_STRUCT(u, 64, 2) # define VUINT64x2_CONSTANT(a, b) \ ((vuint64x2){ .arr = { a, b } }) VEC_DEFINE_OPERATIONS(u, 64, 2) VEC_GENERIC_COMPARISONS(u, 64, 2) #endif #ifndef VEC_VINT8X16 # define VEC_VINT8X16 VEC_DEFINE_STRUCT(, 8, 16) # define VINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \ ((vint8x16){ .arr = { a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p } }) VEC_DEFINE_OPERATIONS(, 8, 16) VEC_GENERIC_COMPARISONS(, 8, 16) #endif #ifndef VEC_VINT16X8 # define VEC_VINT16X8 VEC_DEFINE_STRUCT(, 16, 8) # define VINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \ ((vint16x8){ .arr = { a, b, c, d, e, f, g, h } }) VEC_DEFINE_OPERATIONS(, 16, 8) VEC_GENERIC_COMPARISONS(, 16, 8) #endif #ifndef VEC_VINT32X4 # define VEC_VINT32X4 VEC_DEFINE_STRUCT(, 32, 4) # define VINT32x4_CONSTANT(a, b, c, d) \ ((vint32x4){ .arr = { a, b, c, d } }) VEC_DEFINE_OPERATIONS(, 32, 4) VEC_GENERIC_COMPARISONS(, 32, 4) #endif #ifndef VEC_VINT64X2 # define VEC_VINT64X2 VEC_DEFINE_STRUCT(, 64, 2) # define VINT64x2_CONSTANT(a, b) \ ((vint64x2){ .arr = { a, b } }) VEC_DEFINE_OPERATIONS(, 64, 2) VEC_GENERIC_COMPARISONS(, 64, 2) #endif #undef VEC_DEFINE_STRUCT #undef VEC_DEFINE_OPERATIONS