Mercurial > vec
view gen/gengeneric.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | |
children |
line wrap: on
line source
/** * vec - a tiny SIMD vector library in C99 * * Copyright (c) 2024-2025 Paper * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. **/ #include "genlib.h" #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) static void op_builtin_pbody(int op, int type, int bits, int size) { const char *ops[OP_FINAL_] = { [OP_ADD] = "+", [OP_SUB] = "-", [OP_MUL] = "*", [OP_AND] = "&", [OP_OR] = "|", [OP_XOR] = "^", }; int i; for (i = 0; i < size; i++) printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]);\n", i, i, ops[op], i); puts("\treturn vec1;"); (void)bits; } static void op_builtin_avg_pbody(int op, int type, int bits, int size) { int i; switch (type) { case TYPE_INT: printf("\t"); gen_print_stype(type, bits); printf(" x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;\n"); for (i = 0; i < size; i++) printf( "\tx_d_rem = (vec1.generic[%d] % 2);\n" "\ty_d_rem = (vec2.generic[%d] % 2);\n" "\trem_d_quot = ((x_d_rem + y_d_rem) / 2);\n" "\trem_d_rem = ((x_d_rem + y_d_rem) % 2);\n" "\n" "\tvec1.generic[%d] = ((vec1.generic[%d] / 2) + (vec2.generic[%d] / 2)) + (rem_d_quot) + (rem_d_rem == 1);\n" , i, i, i, i, i); break; case TYPE_UINT: for (i = 0; i < size; i++) printf("vec1.generic[%d] = (vec1.generic[%d] >> 1) + (vec2.generic[%d] >> 1) + ((vec1.generic[%d] | vec2.generic[%d]) & 1);\n", i, i, i, i, i); break; case TYPE_FLOAT: /* this is probably fine. */ for (i = 0; i < size; i++) printf("\tvec1.generic[%d] = (vec1.generic[%d] + vec2.generic[%d]) / 2;\n", i, i, i); break; } printf("\treturn vec1;\n"); } static void op_builtin_not_pbody(int op, int type, int bits, int size) { int i; for (i = 0; i < size; i++) printf("\tvec.generic[%d] = ~vec.generic[%d];\n", i, i); puts("\treturn vec;"); } static void op_builtin_shift_pbody(int op, int type, int bits, int size) { int i; switch (type) { case TYPE_UINT: { const char *ops[] = { [OP_LSHIFT] = "<<", [OP_RSHIFT] = ">>", [OP_LRSHIFT] = ">>", }; for (i = 0; i < size; i++) printf("\tvec1.generic[%d] %s= vec2.generic[%d];\n", i, ops[op]); break; } case TYPE_INT: { switch (op) { case OP_LSHIFT: case OP_LRSHIFT: { const char *ops[] = { [OP_LSHIFT] = "<<", [OP_LRSHIFT] = ">>", }; printf("\tunion { "); gen_print_stype(TYPE_UINT, bits); printf(" u; "); gen_print_stype(TYPE_INT, bits); puts(" s; } x;\n"); for (i = 0; i < size; i++) printf( "\tx.s = vec1.generic[%d];\n" "\tx.u %s= vec2.generic[%d];\n" "\tvec1.generic[%d] = x.s;\n", i, ops[op], i, i); break; } case OP_RSHIFT: for (i = 0; i < size; i++) printf("vec1.generic[%d] = ((~vec1.generic[%d]) >> vec2.generic[%d]);\n", i, i, i); break; } break; } } puts("\treturn vec1;"); } static void op_builtin_nonzero_pbody(int op, int type, int bits, int size) { const char *ops[OP_FINAL_] = { [OP_DIV] = "/", [OP_MOD] = "%", }; int i; if (op == OP_MOD && type == TYPE_FLOAT) { for (i = 0; i < size; i++) printf("\tvec1.generic[%d] = (vec2.generic[%d] ? fmod(vec1.generic[%d], vec2.generic[%d]) : 0);\n", i, i, i, i); } else { for (i = 0; i < size; i++) printf("\tvec1.generic[%d] = (vec2.generic[%d] ? (vec1.generic[%d] %s vec2.generic[%d]) : 0);\n", i, i, i, ops[op], i); } puts("\treturn vec1;"); (void)bits; } static void op_cmp_pbody(int op, int type, int bits, int size) { const char *ops[OP_FINAL_] = { [OP_CMPLT] = "<", [OP_CMPLE] = "<=", [OP_CMPEQ] = "==", [OP_CMPGE] = ">=", [OP_CMPGT] = ">", }; int i; /* this is portable for int uint and float*/ for (i = 0; i < size; i++) printf("\tmemset(&vec1.generic[%d], (vec1.generic[%d] %s vec2.generic[%d]) ? 0xFF : 0, %d);\n", i, i, ops[op], i, bits / 8); puts("\treturn vec1;"); } static void op_minmax_pbody(int op, int type, int bits, int size) { const char *ops[OP_FINAL_] = { [OP_MIN] = "<", [OP_MAX] = ">", }; int i; for (i = 0; i < size; i++) printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]) ? (vec1.generic[%d]) : (vec2.generic[%d]);\n", i, i, ops[op], i, i, i); puts("\treturn vec1;"); } static void op_splat_pbody(int op, int type, int bits, int size) { int i; printf("\t"); gen_print_vtype(type, bits, size); printf(" vec;\n"); for (i = 0; i < size; i++) printf("\tvec.generic[%d] = x;\n", i); puts("\treturn vec;"); } static void op_load_pbody(int op, int type, int bits, int size) { int i; printf("\t"); gen_print_vtype(type, bits, size); printf(" vec;\n"); printf("\tmemcpy(vec.generic, x, %d);\n", (bits / 8) * size); puts("\treturn vec;"); } static void op_store_pbody(int op, int type, int bits, int size) { printf("\tmemcpy(x, vec.generic, %d);\n", (bits / 8) * size); } /* ------------------------------------------------------------------------ */ static struct op_impl op_impl[OP_FINAL_] = { [OP_SPLAT] = {NULL, NULL, op_splat_pbody}, [OP_LOAD_ALIGNED] = {NULL, NULL, op_load_pbody}, [OP_LOAD] = {NULL, NULL, op_load_pbody}, [OP_STORE_ALIGNED] = {NULL, NULL, op_store_pbody}, [OP_STORE] = {NULL, NULL, op_store_pbody}, /* arithmetic */ [OP_ADD] = {NULL, NULL, op_builtin_pbody}, [OP_SUB] = {NULL, NULL, op_builtin_pbody}, [OP_MUL] = {NULL, NULL, op_builtin_pbody}, [OP_DIV] = {NULL, NULL, op_builtin_nonzero_pbody}, [OP_MOD] = {NULL, NULL, op_builtin_nonzero_pbody}, [OP_AVG] = {NULL, NULL, op_builtin_avg_pbody}, /* bitwise */ [OP_AND] = {NULL, NULL, op_builtin_pbody}, [OP_OR] = {NULL, NULL, op_builtin_pbody}, [OP_XOR] = {NULL, NULL, op_builtin_pbody}, [OP_NOT] = {NULL, NULL, op_builtin_not_pbody}, /* min/max */ [OP_MIN] = {NULL, NULL, op_minmax_pbody}, [OP_MAX] = {NULL, NULL, op_minmax_pbody}, /* bitshift */ [OP_LSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, [OP_LRSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, [OP_RSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, /* comparison */ [OP_CMPLT] = {NULL, NULL, op_cmp_pbody}, [OP_CMPLE] = {NULL, NULL, op_cmp_pbody}, [OP_CMPEQ] = {NULL, NULL, op_cmp_pbody}, [OP_CMPGE] = {NULL, NULL, op_cmp_pbody}, [OP_CMPGT] = {NULL, NULL, op_cmp_pbody}, }; int main(void) { gen(op_impl, "generic"); return 0; }