Mercurial > vec
diff gen/genaltivec.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/genaltivec.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,250 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +/* ------------------------------------------------------------------------ */ + +/* #define USE_VSX_EXTENSIONS */ +/* #define USE_POWER8_EXTENSIONS */ + +static int altivec_check(int op, int type, int bits, int size) +{ + switch (bits) { + case 8: + case 16: + case 32: +#ifdef USE_VSX_EXTENSIONS + case 64: +# ifndef USE_POWER8_EXTENSIONS + /* VSX has double, but not int64 */ + if ((bits == 64) && (type != TYPE_FLOAT)) + return 0; +# endif +#endif + if (bits * size == 128) + return 1; + default: + break; + } + + return 0; +} + +static int altivec_check_int(int op, int type, int bits, int size) +{ + return (altivec_check(op, type, bits, size) && type != TYPE_FLOAT); +} + +static int altivec_check_float(int op, int type, int bits, int size) +{ + return (altivec_check(op, type, bits, size) && type == TYPE_FLOAT); +} + +static void altivec_ppcheck(int op, int type, int bits, int size) +{ + /* old gcc had a broken partial implementation + * (why even bother adding it at all?) */ + switch (op) { + case OP_MUL: printf("defined(vec_mul)"); break; + case OP_SPLAT: printf("defined(vec_splats)"); break; + } +} + +static void altivec_splat(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + puts("\tvec.altivec = vec_splats(x);"); + puts("\treturn vec;"); +} + +static void altivec_load(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));"); + puts("\treturn vec;"); +} + +static void altivec_load_aligned(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + puts("\tvec.altivec = vec_ld(0, x);"); + puts("\treturn vec;"); +} + +static void altivec_store_aligned(int op, int type, int bits, int size) +{ + puts("\tvec_st(vec.altivec, 0, x);"); +} + +/* no store? */ + +static void altivec_print_native_type(int type, int bits) +{ + /* WITH DIRECTION AND MAGNITUDE! */ + printf("vector "); + + switch (type) { + case TYPE_INT: + printf("signed "); + break; + case TYPE_UINT: + printf("unsigned "); + break; + case TYPE_FLOAT: + /* nothing */ + break; + } + + switch (type) { + case TYPE_INT: + case TYPE_UINT: + switch (bits) { + case 8: printf("char"); break; + case 16: printf("short"); break; + case 32: printf("int"); break; + case 64: printf("long long"); break; + default: break; + } + break; + case TYPE_FLOAT: + switch (bits) { + case 32: printf("float"); break; + case 64: printf("double"); break; + default: break; + } + } +} + +static void altivec_2op(int op, int type, int bits, int size) +{ + static const char *op_altivec[] = { + [OP_ADD] = "add", + [OP_SUB] = "sub", + [OP_MUL] = "mul", + [OP_DIV] = "div", + [OP_MOD] = "mod", + [OP_AND] = "and", + [OP_OR] = "or", + [OP_XOR] = "xor", + [OP_CMPLT] = "cmplt", + [OP_CMPEQ] = "cmpeq", + [OP_CMPGT] = "cmpgt", + [OP_CMPLE] = "cmple", + [OP_CMPGE] = "cmpge", + [OP_MIN] = "min", + [OP_MAX] = "max", + [OP_AVG] = "avg", + [OP_LSHIFT] = "sl", + [OP_LRSHIFT] = "sr", + }; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + if (op == OP_RSHIFT) { + printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (type == TYPE_INT) ? "a" : ""); + } else { + printf("\tvec.altivec = ("); + altivec_print_native_type(type, bits); + printf(")vec_%s(vec1.altivec, vec2.altivec);\n", op_altivec[op]); + } + + puts("\treturn vec;"); +} + +/* ------------------------------------------------------------------------ */ + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {altivec_check, NULL, altivec_splat}, + [OP_LOAD_ALIGNED] = {altivec_check, NULL, altivec_load_aligned}, + [OP_LOAD] = {altivec_check, NULL, altivec_load}, + [OP_STORE_ALIGNED] = {altivec_check, NULL, altivec_store_aligned}, + + /* arithmetic */ + [OP_ADD] = {altivec_check, NULL, altivec_2op}, + [OP_SUB] = {altivec_check, NULL, altivec_2op}, + [OP_MUL] = {altivec_check, NULL, altivec_2op}, +#ifdef USE_VSX_EXTENSIONS + /* GCC fails to compile integer division, so limit to floats */ + [OP_DIV] = {altivec_check_float, NULL, altivec_2op}, +#endif +#if 0 + /* This is Power10. I don't have any Power10 hardware :) + * (well, I also don't have any VSX hardware. whatever) */ + [OP_MOD] = {altivec_check_int, NULL, altivec_2op}, +#endif + [OP_AVG] = {altivec_check_int, NULL, altivec_2op}, + + /* bitwise */ + [OP_AND] = {altivec_check, NULL, altivec_2op}, + [OP_OR] = {altivec_check, NULL, altivec_2op}, + [OP_XOR] = {altivec_check, NULL, altivec_2op}, + + /* min/max */ + [OP_MIN] = {altivec_check, NULL, altivec_2op}, + [OP_MAX] = {altivec_check, NULL, altivec_2op}, + + /* bitshift */ + [OP_LSHIFT] = {altivec_check, NULL, altivec_2op}, + [OP_LRSHIFT] = {altivec_check, NULL, altivec_2op}, + [OP_RSHIFT] = {altivec_check, NULL, altivec_2op}, + + /* comparison */ + [OP_CMPLT] = {altivec_check, NULL, altivec_2op}, +#ifdef USE_VSX_EXTENSIONS + [OP_CMPLE] = {altivec_check, NULL, altivec_2op}, +#endif + [OP_CMPEQ] = {altivec_check, NULL, altivec_2op}, +#ifdef USE_VSX_EXTENSIONS + [OP_CMPGE] = {altivec_check, NULL, altivec_2op}, +#endif + [OP_CMPGT] = {altivec_check, NULL, altivec_2op}, +}; + + +int main(void) +{ + gen(op_impl, +#ifdef USE_POWER8_EXTENSIONS + "power8" +#elif defined(USE_VSX_EXTENSIONS) + "vsx" +#else + "altivec" +#endif + ); +}