Mercurial > vec
diff gen/gengcc.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/gengcc.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,219 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +static int gcc_minmax_only_integer(int op, int type, int bits, int size) +{ + return (type == TYPE_INT || type == TYPE_UINT); +} + +/* ------------------------------------------------------------------------ */ + +static void pp_gcc_prereq_4_3_0(int op, int type, int bits, int size) +{ + printf("VEC_GNUC_ATLEAST(4, 3, 0)"); +} + +static void gcc_print_easy_op(int op, int type, int bits, int size) +{ + static const char *op_builtins[] = { + [OP_ADD] = "+", + [OP_SUB] = "-", + [OP_MUL] = "*", + [OP_DIV] = "/", + [OP_MOD] = "%", + [OP_AND] = "&", + [OP_OR] = "|", + [OP_XOR] = "^", + [OP_CMPLT] = "<", + [OP_CMPLE] = "<=", + [OP_CMPEQ] = "==", + [OP_CMPGE] = ">=", + [OP_CMPGT] = ">", + }; + + printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); + printf("\treturn vec1;\n"); +} + +static void gcc_print_splat(int op, int type, int bits, int size) +{ + int i; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + for (i = 0; i < size; i++) + printf("\tvec.gcc[%d] = x;\n", i); + printf("\treturn vec;\n"); +} + +static void gcc_print_load_aligned(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); + printf("\treturn vec;\n"); +} + +static void gcc_print_load(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + puts("\tmemcpy(&vec, x, sizeof(vec));"); + printf("\treturn vec;\n"); +} + +static void gcc_print_store_aligned(int op, int type, int bits, int size) +{ + puts("\t*(__typeof__(vec.gcc) *)x = vec.gcc;"); +} + +static void gcc_print_store(int op, int type, int bits, int size) +{ + puts("\tmemcpy(x, &vec, sizeof(vec));"); +} + +static void gcc_print_rorlshift(int op, int type, int bits, int size) +{ + static const char *op_builtins[] = { + [OP_LSHIFT] = "<<", + [OP_RSHIFT] = ">>", + }; + + printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); + printf("\treturn vec1;\n"); +} + +static void gcc_print_lrshift(int op, int type, int bits, int size) +{ + printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); + printf("\treturn vec1;\n"); +} + +static void gcc_print_minmax(int op, int type, int bits, int size) +{ + static const char *op_builtins[] = { + [OP_MIN] = "<", + [OP_MAX] = ">" + }; + + switch (type) { + case TYPE_INT: + case TYPE_UINT: + /* yay */ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" mask;\n"); + printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); + printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); + printf("\treturn vec1;\n"); + break; + default: + /* hm? */ + break; + } +} + +static void gcc_print_avg(int op, int type, int bits, int size) +{ + switch (type) { + case TYPE_INT: + printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); + puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);"); + puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);"); + puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);"); + puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);"); + puts(""); + printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); + break; + case TYPE_UINT: + printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n"); + break; + case TYPE_FLOAT: + printf("\tvec1.gcc = (vec1.gcc + vec2.gcc) / 2;\n"); + break; + } + + printf("\treturn vec1;\n"); +} + +static void gcc_print_not(int op, int type, int bits, int size) +{ + printf("\tvec.gcc = ~vec.gcc;\n"); + printf("\treturn vec;\n"); +} + +/* ------------------------------------------------------------------------ */ + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {NULL, NULL, gcc_print_splat}, + [OP_LOAD_ALIGNED] = {NULL, NULL, gcc_print_load_aligned}, + [OP_LOAD] = {NULL, NULL, gcc_print_load}, + [OP_STORE_ALIGNED] = {NULL, NULL, gcc_print_store_aligned}, + [OP_STORE] = {NULL, NULL, gcc_print_store}, + + /* arithmetic */ + [OP_ADD] = {NULL, NULL, gcc_print_easy_op}, + [OP_SUB] = {NULL, NULL, gcc_print_easy_op}, + [OP_MUL] = {NULL, NULL, gcc_print_easy_op}, +#if 0 + /* no defined divide by zero behavior */ + [OP_DIV] = {NULL, NULL, gcc_print_easy_op}, + [OP_MOD] = {NULL, NULL, gcc_print_easy_op}, +#endif + [OP_AVG] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_avg}, + + /* bitwise */ + [OP_AND] = {NULL, NULL, gcc_print_easy_op}, + [OP_OR] = {NULL, NULL, gcc_print_easy_op}, + [OP_XOR] = {NULL, NULL, gcc_print_easy_op}, + [OP_NOT] = {NULL, NULL, gcc_print_not}, + + /* min/max */ + [OP_MIN] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, + [OP_MAX] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, + + /* bitshift */ + [OP_LSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, + [OP_LRSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_lrshift}, + [OP_RSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, + + /* comparison */ + [OP_CMPLT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPLE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPEQ] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPGE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPGT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, +}; + +int main(void) +{ + gen(op_impl, "gcc"); +}