Mercurial > vec
diff gen/gengeneric.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/gengeneric.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,274 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +static void op_builtin_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_ADD] = "+", + [OP_SUB] = "-", + [OP_MUL] = "*", + [OP_AND] = "&", + [OP_OR] = "|", + [OP_XOR] = "^", + }; + int i; + + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]);\n", i, i, ops[op], i); + + puts("\treturn vec1;"); + + (void)bits; +} + +static void op_builtin_avg_pbody(int op, int type, int bits, int size) +{ + int i; + + switch (type) { + case TYPE_INT: + printf("\t"); + gen_print_stype(type, bits); + printf(" x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;\n"); + + for (i = 0; i < size; i++) + printf( + "\tx_d_rem = (vec1.generic[%d] % 2);\n" + "\ty_d_rem = (vec2.generic[%d] % 2);\n" + "\trem_d_quot = ((x_d_rem + y_d_rem) / 2);\n" + "\trem_d_rem = ((x_d_rem + y_d_rem) % 2);\n" + "\n" + "\tvec1.generic[%d] = ((vec1.generic[%d] / 2) + (vec2.generic[%d] / 2)) + (rem_d_quot) + (rem_d_rem == 1);\n" + , i, i, i, i, i); + break; + case TYPE_UINT: + for (i = 0; i < size; i++) + printf("vec1.generic[%d] = (vec1.generic[%d] >> 1) + (vec2.generic[%d] >> 1) + ((vec1.generic[%d] | vec2.generic[%d]) & 1);\n", i, i, i, i, i); + break; + case TYPE_FLOAT: + /* this is probably fine. */ + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec1.generic[%d] + vec2.generic[%d]) / 2;\n", i, i, i); + break; + } + + printf("\treturn vec1;\n"); +} + +static void op_builtin_not_pbody(int op, int type, int bits, int size) +{ + int i; + + for (i = 0; i < size; i++) + printf("\tvec.generic[%d] = ~vec.generic[%d];\n", i, i); + + puts("\treturn vec;"); +} + +static void op_builtin_shift_pbody(int op, int type, int bits, int size) +{ + int i; + + switch (type) { + case TYPE_UINT: { + const char *ops[] = { + [OP_LSHIFT] = "<<", + [OP_RSHIFT] = ">>", + [OP_LRSHIFT] = ">>", + }; + + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] %s= vec2.generic[%d];\n", i, ops[op]); + break; + } + case TYPE_INT: { + switch (op) { + case OP_LSHIFT: + case OP_LRSHIFT: { + const char *ops[] = { + [OP_LSHIFT] = "<<", + [OP_LRSHIFT] = ">>", + }; + + printf("\tunion { "); + gen_print_stype(TYPE_UINT, bits); + printf(" u; "); + gen_print_stype(TYPE_INT, bits); + puts(" s; } x;\n"); + + for (i = 0; i < size; i++) + printf( + "\tx.s = vec1.generic[%d];\n" + "\tx.u %s= vec2.generic[%d];\n" + "\tvec1.generic[%d] = x.s;\n", + i, ops[op], i, i); + break; + } + case OP_RSHIFT: + for (i = 0; i < size; i++) + printf("vec1.generic[%d] = ((~vec1.generic[%d]) >> vec2.generic[%d]);\n", i, i, i); + break; + } + break; + } + } + + puts("\treturn vec1;"); +} + +static void op_builtin_nonzero_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_DIV] = "/", + [OP_MOD] = "%", + }; + int i; + + if (op == OP_MOD && type == TYPE_FLOAT) { + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec2.generic[%d] ? fmod(vec1.generic[%d], vec2.generic[%d]) : 0);\n", i, i, i, i); + } else { + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec2.generic[%d] ? (vec1.generic[%d] %s vec2.generic[%d]) : 0);\n", i, i, i, ops[op], i); + } + + puts("\treturn vec1;"); + + (void)bits; +} + +static void op_cmp_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_CMPLT] = "<", + [OP_CMPLE] = "<=", + [OP_CMPEQ] = "==", + [OP_CMPGE] = ">=", + [OP_CMPGT] = ">", + }; + int i; + + /* this is portable for int uint and float*/ + for (i = 0; i < size; i++) + printf("\tmemset(&vec1.generic[%d], (vec1.generic[%d] %s vec2.generic[%d]) ? 0xFF : 0, %d);\n", i, i, ops[op], i, bits / 8); + + puts("\treturn vec1;"); +} + +static void op_minmax_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_MIN] = "<", + [OP_MAX] = ">", + }; + int i; + + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]) ? (vec1.generic[%d]) : (vec2.generic[%d]);\n", i, i, ops[op], i, i, i); + + puts("\treturn vec1;"); +} + +static void op_splat_pbody(int op, int type, int bits, int size) +{ + int i; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + for (i = 0; i < size; i++) + printf("\tvec.generic[%d] = x;\n", i); + + puts("\treturn vec;"); +} + +static void op_load_pbody(int op, int type, int bits, int size) +{ + int i; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + printf("\tmemcpy(vec.generic, x, %d);\n", (bits / 8) * size); + + puts("\treturn vec;"); +} + +static void op_store_pbody(int op, int type, int bits, int size) +{ + printf("\tmemcpy(x, vec.generic, %d);\n", (bits / 8) * size); +} + +/* ------------------------------------------------------------------------ */ + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {NULL, NULL, op_splat_pbody}, + [OP_LOAD_ALIGNED] = {NULL, NULL, op_load_pbody}, + [OP_LOAD] = {NULL, NULL, op_load_pbody}, + [OP_STORE_ALIGNED] = {NULL, NULL, op_store_pbody}, + [OP_STORE] = {NULL, NULL, op_store_pbody}, + + /* arithmetic */ + [OP_ADD] = {NULL, NULL, op_builtin_pbody}, + [OP_SUB] = {NULL, NULL, op_builtin_pbody}, + [OP_MUL] = {NULL, NULL, op_builtin_pbody}, + [OP_DIV] = {NULL, NULL, op_builtin_nonzero_pbody}, + [OP_MOD] = {NULL, NULL, op_builtin_nonzero_pbody}, + [OP_AVG] = {NULL, NULL, op_builtin_avg_pbody}, + + /* bitwise */ + [OP_AND] = {NULL, NULL, op_builtin_pbody}, + [OP_OR] = {NULL, NULL, op_builtin_pbody}, + [OP_XOR] = {NULL, NULL, op_builtin_pbody}, + [OP_NOT] = {NULL, NULL, op_builtin_not_pbody}, + + /* min/max */ + [OP_MIN] = {NULL, NULL, op_minmax_pbody}, + [OP_MAX] = {NULL, NULL, op_minmax_pbody}, + + /* bitshift */ + [OP_LSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, + [OP_LRSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, + [OP_RSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, + + /* comparison */ + [OP_CMPLT] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPLE] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPEQ] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPGE] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPGT] = {NULL, NULL, op_cmp_pbody}, +}; + +int main(void) +{ + gen(op_impl, "generic"); + + return 0; +}