Mercurial > vec
view gen/gengcc.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | |
children |
line wrap: on
line source
/** * vec - a tiny SIMD vector library in C99 * * Copyright (c) 2024-2025 Paper * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. **/ #include "genlib.h" #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) static int gcc_minmax_only_integer(int op, int type, int bits, int size) { return (type == TYPE_INT || type == TYPE_UINT); } /* ------------------------------------------------------------------------ */ static void pp_gcc_prereq_4_3_0(int op, int type, int bits, int size) { printf("VEC_GNUC_ATLEAST(4, 3, 0)"); } static void gcc_print_easy_op(int op, int type, int bits, int size) { static const char *op_builtins[] = { [OP_ADD] = "+", [OP_SUB] = "-", [OP_MUL] = "*", [OP_DIV] = "/", [OP_MOD] = "%", [OP_AND] = "&", [OP_OR] = "|", [OP_XOR] = "^", [OP_CMPLT] = "<", [OP_CMPLE] = "<=", [OP_CMPEQ] = "==", [OP_CMPGE] = ">=", [OP_CMPGT] = ">", }; printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); printf("\treturn vec1;\n"); } static void gcc_print_splat(int op, int type, int bits, int size) { int i; printf("\t"); gen_print_vtype(type, bits, size); printf(" vec;\n"); for (i = 0; i < size; i++) printf("\tvec.gcc[%d] = x;\n", i); printf("\treturn vec;\n"); } static void gcc_print_load_aligned(int op, int type, int bits, int size) { printf("\t"); gen_print_vtype(type, bits, size); printf(" vec;\n"); puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); printf("\treturn vec;\n"); } static void gcc_print_load(int op, int type, int bits, int size) { printf("\t"); gen_print_vtype(type, bits, size); printf(" vec;\n"); puts("\tmemcpy(&vec, x, sizeof(vec));"); printf("\treturn vec;\n"); } static void gcc_print_store_aligned(int op, int type, int bits, int size) { puts("\t*(__typeof__(vec.gcc) *)x = vec.gcc;"); } static void gcc_print_store(int op, int type, int bits, int size) { puts("\tmemcpy(x, &vec, sizeof(vec));"); } static void gcc_print_rorlshift(int op, int type, int bits, int size) { static const char *op_builtins[] = { [OP_LSHIFT] = "<<", [OP_RSHIFT] = ">>", }; printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); printf("\treturn vec1;\n"); } static void gcc_print_lrshift(int op, int type, int bits, int size) { printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); printf("\treturn vec1;\n"); } static void gcc_print_minmax(int op, int type, int bits, int size) { static const char *op_builtins[] = { [OP_MIN] = "<", [OP_MAX] = ">" }; switch (type) { case TYPE_INT: case TYPE_UINT: /* yay */ printf("\t"); gen_print_vtype(type, bits, size); printf(" mask;\n"); printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); printf("\treturn vec1;\n"); break; default: /* hm? */ break; } } static void gcc_print_avg(int op, int type, int bits, int size) { switch (type) { case TYPE_INT: printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);"); puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);"); puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);"); puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);"); puts(""); printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); break; case TYPE_UINT: printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n"); break; case TYPE_FLOAT: printf("\tvec1.gcc = (vec1.gcc + vec2.gcc) / 2;\n"); break; } printf("\treturn vec1;\n"); } static void gcc_print_not(int op, int type, int bits, int size) { printf("\tvec.gcc = ~vec.gcc;\n"); printf("\treturn vec;\n"); } /* ------------------------------------------------------------------------ */ static struct op_impl op_impl[OP_FINAL_] = { [OP_SPLAT] = {NULL, NULL, gcc_print_splat}, [OP_LOAD_ALIGNED] = {NULL, NULL, gcc_print_load_aligned}, [OP_LOAD] = {NULL, NULL, gcc_print_load}, [OP_STORE_ALIGNED] = {NULL, NULL, gcc_print_store_aligned}, [OP_STORE] = {NULL, NULL, gcc_print_store}, /* arithmetic */ [OP_ADD] = {NULL, NULL, gcc_print_easy_op}, [OP_SUB] = {NULL, NULL, gcc_print_easy_op}, [OP_MUL] = {NULL, NULL, gcc_print_easy_op}, #if 0 /* no defined divide by zero behavior */ [OP_DIV] = {NULL, NULL, gcc_print_easy_op}, [OP_MOD] = {NULL, NULL, gcc_print_easy_op}, #endif [OP_AVG] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_avg}, /* bitwise */ [OP_AND] = {NULL, NULL, gcc_print_easy_op}, [OP_OR] = {NULL, NULL, gcc_print_easy_op}, [OP_XOR] = {NULL, NULL, gcc_print_easy_op}, [OP_NOT] = {NULL, NULL, gcc_print_not}, /* min/max */ [OP_MIN] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, [OP_MAX] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, /* bitshift */ [OP_LSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, [OP_LRSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_lrshift}, [OP_RSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, /* comparison */ [OP_CMPLT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, [OP_CMPLE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, [OP_CMPEQ] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, [OP_CMPGE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, [OP_CMPGT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, }; int main(void) { gen(op_impl, "gcc"); }